1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 - cannot call this if pcbddc->local_mat is NULL 53 */ 54 if (pcbddc->benign_p0_lidx < 0) { 55 PetscFunctionReturn(0); 56 } 57 if (pop) { 58 const PetscInt *cB0_cols; 59 PetscInt cB0_ncol; 60 const PetscScalar *cB0_vals; 61 62 /* extract B_0 */ 63 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 64 pcbddc->B0_ncol = cB0_ncol; 65 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 68 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 70 /* remove rows and cols from local problem */ 71 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 72 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 73 } else { /* push */ 74 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 77 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 79 } 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "PCBDDCAdaptiveSelection" 85 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 86 { 87 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 88 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 89 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 90 PetscBLASInt *B_iwork,*B_ifail; 91 PetscScalar *work,lwork; 92 PetscScalar *St,*S,*eigv; 93 PetscScalar *Sarray,*Starray; 94 PetscReal *eigs,thresh; 95 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 96 PetscBool allocated_S_St; 97 #if defined(PETSC_USE_COMPLEX) 98 PetscReal *rwork; 99 #endif 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 if (!sub_schurs->use_mumps) { 104 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 105 } 106 107 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 108 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 109 } 110 111 if (pcbddc->dbg_flag) { 112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 115 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 116 } 117 118 if (pcbddc->dbg_flag) { 119 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 120 } 121 122 /* max size of subsets */ 123 mss = 0; 124 for (i=0;i<sub_schurs->n_subs;i++) { 125 PetscInt subset_size; 126 127 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 128 mss = PetscMax(mss,subset_size); 129 } 130 131 /* min/max and threshold */ 132 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 133 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 134 nmax = PetscMax(nmin,nmax); 135 allocated_S_St = PETSC_FALSE; 136 if (nmin) { 137 allocated_S_St = PETSC_TRUE; 138 } 139 140 /* allocate lapack workspace */ 141 cum = cum2 = 0; 142 maxneigs = 0; 143 for (i=0;i<sub_schurs->n_subs;i++) { 144 PetscInt n,subset_size; 145 146 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 147 n = PetscMin(subset_size,nmax); 148 cum += subset_size; 149 cum2 += subset_size*n; 150 maxneigs = PetscMax(maxneigs,n); 151 } 152 if (mss) { 153 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 154 PetscBLASInt B_itype = 1; 155 PetscBLASInt B_N = mss; 156 PetscReal zero = 0.0; 157 PetscReal eps = 0.0; /* dlamch? */ 158 159 B_lwork = -1; 160 S = NULL; 161 St = NULL; 162 eigs = NULL; 163 eigv = NULL; 164 B_iwork = NULL; 165 B_ifail = NULL; 166 #if defined(PETSC_USE_COMPLEX) 167 rwork = NULL; 168 #endif 169 thresh = 1.0; 170 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 171 #if defined(PETSC_USE_COMPLEX) 172 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 173 #else 174 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 175 #endif 176 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 178 } else { 179 /* TODO */ 180 } 181 } else { 182 lwork = 0; 183 } 184 185 nv = 0; 186 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 187 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 188 } 189 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 190 if (allocated_S_St) { 191 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 192 } 193 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 194 #if defined(PETSC_USE_COMPLEX) 195 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 196 #endif 197 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 199 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 200 nv+cum,&pcbddc->adaptive_constraints_idxs, 201 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 202 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 203 204 maxneigs = 0; 205 cum = cum2 = cumarray = 0; 206 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 207 pcbddc->adaptive_constraints_data_ptr[0] = 0; 208 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 209 const PetscInt *idxs; 210 211 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 212 for (cum=0;cum<nv;cum++) { 213 pcbddc->adaptive_constraints_n[cum] = 1; 214 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 215 pcbddc->adaptive_constraints_data[cum] = 1.0; 216 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 217 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 218 } 219 cum2 = cum; 220 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 221 } 222 223 if (mss) { /* multilevel */ 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 225 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 226 } 227 228 for (i=0;i<sub_schurs->n_subs;i++) { 229 230 const PetscInt *idxs; 231 PetscReal infty = PETSC_MAX_REAL; 232 PetscInt j,subset_size,eigs_start = 0; 233 PetscBLASInt B_N; 234 PetscBool same_data = PETSC_FALSE; 235 236 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 237 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 238 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 239 if (sub_schurs->is_hermitian) { 240 PetscInt j,k; 241 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 242 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 for (j=0;j<subset_size;j++) { 246 for (k=j;k<subset_size;k++) { 247 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 248 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 249 } 250 } 251 } else { 252 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 } else { 256 S = Sarray + cumarray; 257 St = Starray + cumarray; 258 } 259 260 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 261 /* see if we can save some work */ 262 if (sub_schurs->n_subs == 1) { 263 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 264 } 265 266 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 267 B_neigs = 0; 268 } else { 269 /* Threshold: this is an heuristic for edges */ 270 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 271 272 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 273 PetscBLASInt B_itype = 1; 274 PetscBLASInt B_IL, B_IU; 275 PetscReal eps = -1.0; /* dlamch? */ 276 PetscInt nmin_s; 277 278 /* ask for eigenvalues larger than thresh */ 279 if (pcbddc->dbg_flag) { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 281 } 282 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 283 #if defined(PETSC_USE_COMPLEX) 284 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 285 #else 286 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 287 #endif 288 ierr = PetscFPTrapPop();CHKERRQ(ierr); 289 if (B_ierr) { 290 if (B_ierr < 0 ) { 291 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 292 } else if (B_ierr <= B_N) { 293 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 294 } else { 295 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 296 } 297 } 298 299 if (B_neigs > nmax) { 300 if (pcbddc->dbg_flag) { 301 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 302 } 303 eigs_start = B_neigs -nmax; 304 B_neigs = nmax; 305 } 306 307 nmin_s = PetscMin(nmin,B_N); 308 if (B_neigs < nmin_s) { 309 PetscBLASInt B_neigs2; 310 311 B_IU = B_N - B_neigs; 312 B_IL = B_N - nmin_s + 1; 313 if (pcbddc->dbg_flag) { 314 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 315 } 316 if (sub_schurs->is_hermitian) { 317 PetscInt j; 318 for (j=0;j<subset_size;j++) { 319 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 320 } 321 for (j=0;j<subset_size;j++) { 322 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 323 } 324 } else { 325 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 326 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 327 } 328 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 329 #if defined(PETSC_USE_COMPLEX) 330 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 331 #else 332 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 333 #endif 334 ierr = PetscFPTrapPop();CHKERRQ(ierr); 335 B_neigs += B_neigs2; 336 } 337 if (B_ierr) { 338 if (B_ierr < 0 ) { 339 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 340 } else if (B_ierr <= B_N) { 341 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 342 } else { 343 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 344 } 345 } 346 if (pcbddc->dbg_flag) { 347 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 348 for (j=0;j<B_neigs;j++) { 349 if (eigs[j] == 0.0) { 350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 351 } else { 352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 353 } 354 } 355 } 356 } else { 357 /* TODO */ 358 } 359 } 360 maxneigs = PetscMax(B_neigs,maxneigs); 361 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 362 if (B_neigs) { 363 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 364 365 if (pcbddc->dbg_flag > 1) { 366 PetscInt ii; 367 for (ii=0;ii<B_neigs;ii++) { 368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 369 for (j=0;j<B_N;j++) { 370 #if defined(PETSC_USE_COMPLEX) 371 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 372 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 374 #else 375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 376 #endif 377 } 378 } 379 } 380 #if 0 381 for (j=0;j<B_neigs;j++) { 382 PetscBLASInt Blas_N,Blas_one = 1.0; 383 PetscScalar norm; 384 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 385 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 386 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 387 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 388 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 389 } else { 390 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 391 } 392 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 393 } 394 #endif 395 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 396 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 397 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 398 cum++; 399 } 400 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 401 /* shift for next computation */ 402 cumarray += subset_size*subset_size; 403 } 404 if (pcbddc->dbg_flag) { 405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 406 } 407 408 if (mss) { 409 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 410 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 411 /* destroy matrices (junk) */ 412 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 413 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 414 } 415 if (allocated_S_St) { 416 ierr = PetscFree2(S,St);CHKERRQ(ierr); 417 } 418 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 419 #if defined(PETSC_USE_COMPLEX) 420 ierr = PetscFree(rwork);CHKERRQ(ierr); 421 #endif 422 if (pcbddc->dbg_flag) { 423 PetscInt maxneigs_r; 424 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 425 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "PCBDDCSetUpSolvers" 432 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 433 { 434 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 435 PetscScalar *coarse_submat_vals; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 /* Setup local scatters R_to_B and (optionally) R_to_D */ 440 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 441 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 442 443 /* Setup local neumann solver ksp_R */ 444 /* PCBDDCSetUpLocalScatters should be called first! */ 445 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 446 447 /* Change global null space passed in by the user if change of basis has been requested */ 448 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 449 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 450 } 451 452 /* 453 Setup local correction and local part of coarse basis. 454 Gives back the dense local part of the coarse matrix in column major ordering 455 */ 456 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 457 458 /* Compute total number of coarse nodes and setup coarse solver */ 459 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 460 461 /* free */ 462 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCResetCustomization" 468 PetscErrorCode PCBDDCResetCustomization(PC pc) 469 { 470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 471 PetscErrorCode ierr; 472 473 PetscFunctionBegin; 474 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 475 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 476 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 477 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 478 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 479 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 480 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 481 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 482 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 483 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 484 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "PCBDDCResetTopography" 490 PetscErrorCode PCBDDCResetTopography(PC pc) 491 { 492 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 493 PetscErrorCode ierr; 494 495 PetscFunctionBegin; 496 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 497 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 498 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 499 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 500 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCBDDCResetSolvers" 506 PetscErrorCode PCBDDCResetSolvers(PC pc) 507 { 508 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 509 PetscScalar *array; 510 PetscErrorCode ierr; 511 512 PetscFunctionBegin; 513 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 514 if (pcbddc->coarse_phi_B) { 515 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 516 ierr = PetscFree(array);CHKERRQ(ierr); 517 } 518 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 519 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 521 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 522 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 524 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 525 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 526 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 527 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 528 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 529 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 531 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 532 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 534 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 535 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 536 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 537 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 538 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 539 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 540 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 541 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 542 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 543 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 544 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 545 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 551 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 552 { 553 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 554 PC_IS *pcis = (PC_IS*)pc->data; 555 VecType impVecType; 556 PetscInt n_constraints,n_R,old_size,n_benign; 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 if (!pcbddc->ConstraintMatrix) { 561 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 562 } 563 /* get sizes */ 564 n_benign = 0; 565 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 566 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 567 if (n_benign && n_constraints) { 568 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 569 } 570 n_R = pcis->n - pcbddc->n_vertices; 571 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 572 /* local work vectors (try to avoid unneeded work)*/ 573 /* R nodes */ 574 old_size = -1; 575 if (pcbddc->vec1_R) { 576 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 577 } 578 if (n_R != old_size) { 579 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 580 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 581 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 582 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 583 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 584 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 585 } 586 /* local primal dofs */ 587 old_size = -1; 588 if (pcbddc->vec1_P) { 589 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 590 } 591 if (pcbddc->local_primal_size != old_size) { 592 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 593 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 594 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 595 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 596 } 597 /* local explicit constraints */ 598 old_size = -1; 599 if (pcbddc->vec1_C) { 600 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 601 } 602 if (n_constraints && n_constraints != old_size) { 603 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 604 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 605 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 606 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCBDDCSetUpCorrection" 613 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 614 { 615 PetscErrorCode ierr; 616 /* pointers to pcis and pcbddc */ 617 PC_IS* pcis = (PC_IS*)pc->data; 618 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 619 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 620 /* submatrices of local problem */ 621 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 622 /* submatrices of benign trick */ 623 Mat B0_R,B0_V; 624 /* submatrices of local coarse problem */ 625 Mat S_VV,S_CV,S_VC,S_CC; 626 /* working matrices */ 627 Mat C_CR; 628 /* additional working stuff */ 629 PC pc_R; 630 Mat F,B0 = NULL; 631 Vec vec_R_benign,vec_R_benign_2; 632 PetscBool isLU,isCHOL,isILU; 633 634 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 635 PetscScalar *work; 636 PetscInt *idx_V_B; 637 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 638 PetscInt i,n_R,n_D,n_B; 639 PetscBool unsymmetric_check; 640 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 641 MatType impMatType; 642 /* some shortcuts to scalars */ 643 PetscScalar one=1.0,m_one=-1.0; 644 645 PetscFunctionBegin; 646 n_benign = 0; 647 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 648 n_vertices = pcbddc->n_vertices; 649 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 650 /* Set Non-overlapping dimensions */ 651 n_B = pcis->n_B; 652 n_D = pcis->n - n_B; 653 n_R = pcis->n - n_vertices; 654 655 /* Set types for local objects needed by BDDC precondtioner */ 656 impMatType = MATSEQDENSE; 657 658 /* vertices in boundary numbering */ 659 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 660 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 661 if (i != n_vertices) { 662 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 663 } 664 665 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 666 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 667 if (!n_benign) { 668 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 669 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 671 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 672 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 673 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 674 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 675 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 676 } else { /* these wrappers are not needed with the benign trick */ 677 S_VV = S_CV = S_VC = S_CC = NULL; 678 } 679 680 unsymmetric_check = PETSC_FALSE; 681 /* allocate workspace */ 682 n = 0; 683 if (n_constraints) { 684 n += n_R*n_constraints; 685 } 686 if (n_vertices) { 687 n = PetscMax(2*n_R*n_vertices,n); 688 n = PetscMax((n_R+n_B)*n_vertices,n); 689 } 690 if (!pcbddc->symmetric_primal) { 691 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 692 unsymmetric_check = PETSC_TRUE; 693 } 694 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 695 696 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 697 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 699 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 700 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 701 if (isLU || isILU || isCHOL) { 702 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 703 } else if (sub_schurs->reuse_mumps) { 704 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 705 MatFactorType type; 706 707 F = reuse_mumps->F; 708 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 709 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 710 } else { 711 F = NULL; 712 } 713 714 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 715 if (n_constraints) { 716 Mat M1,M2,M3; 717 Mat auxmat; 718 IS is_aux; 719 PetscScalar *array,*array2; 720 721 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 723 724 /* Extract constraints on R nodes: C_{CR} */ 725 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 727 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 728 729 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 730 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 731 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 732 for (i=0;i<n_constraints;i++) { 733 const PetscScalar *row_cmat_values; 734 const PetscInt *row_cmat_indices; 735 PetscInt size_of_constraint,j; 736 737 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 738 for (j=0;j<size_of_constraint;j++) { 739 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 740 } 741 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 742 } 743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 744 if (F) { 745 Mat B; 746 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 748 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 749 ierr = MatDestroy(&B);CHKERRQ(ierr); 750 } else { 751 PetscScalar *marr; 752 753 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 754 for (i=0;i<n_constraints;i++) { 755 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 756 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 757 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 758 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 759 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 760 } 761 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 762 } 763 if (!pcbddc->switch_static) { 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 765 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 766 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 767 for (i=0;i<n_constraints;i++) { 768 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 769 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 770 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 772 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 773 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 774 } 775 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 776 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 777 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 778 } else { 779 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 780 pcbddc->local_auxmat2 = local_auxmat2_R; 781 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 782 } 783 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 784 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 785 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 786 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 787 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 788 if (isCHOL) { 789 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 790 } else { 791 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 792 } 793 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 794 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 795 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 796 ierr = MatDestroy(&M2);CHKERRQ(ierr); 797 ierr = MatDestroy(&M3);CHKERRQ(ierr); 798 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 799 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 800 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 801 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 802 ierr = MatDestroy(&M1);CHKERRQ(ierr); 803 } 804 /* Get submatrices from subdomain matrix */ 805 if (n_vertices) { 806 IS is_aux; 807 808 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 809 IS tis; 810 811 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 812 ierr = ISSort(tis);CHKERRQ(ierr); 813 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 814 ierr = ISDestroy(&tis);CHKERRQ(ierr); 815 } else { 816 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 817 } 818 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 820 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 821 if (n_benign) { 822 IS dummy; 823 PetscInt ii[2]; 824 825 ii[0] = 0; 826 ii[1] = pcbddc->B0_ncol; 827 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 828 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 829 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 830 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 831 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 832 } 833 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 834 } 835 836 /* Matrix of coarse basis functions (local) */ 837 if (pcbddc->coarse_phi_B) { 838 PetscInt on_B,on_primal,on_D=n_D; 839 if (pcbddc->coarse_phi_D) { 840 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 841 } 842 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 843 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 844 PetscScalar *marray; 845 846 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 847 ierr = PetscFree(marray);CHKERRQ(ierr); 848 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 849 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 850 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 851 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 852 } 853 } 854 855 if (!pcbddc->coarse_phi_B) { 856 PetscScalar *marray; 857 858 n = n_B*pcbddc->local_primal_size; 859 if (pcbddc->switch_static || pcbddc->dbg_flag) { 860 n += n_D*pcbddc->local_primal_size; 861 } 862 if (!pcbddc->symmetric_primal) { 863 n *= 2; 864 } 865 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 867 n = n_B*pcbddc->local_primal_size; 868 if (pcbddc->switch_static || pcbddc->dbg_flag) { 869 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 870 n += n_D*pcbddc->local_primal_size; 871 } 872 if (!pcbddc->symmetric_primal) { 873 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 874 if (pcbddc->switch_static || pcbddc->dbg_flag) { 875 n = n_B*pcbddc->local_primal_size; 876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 877 } 878 } else { 879 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 880 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 881 if (pcbddc->switch_static || pcbddc->dbg_flag) { 882 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 883 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 884 } 885 } 886 } 887 888 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 889 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 890 const PetscInt *idxs; 891 892 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 893 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 894 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 895 } 896 /* vertices */ 897 if (n_vertices) { 898 899 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 900 901 if (n_R) { 902 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 903 Mat S_V_benign; 904 PetscBLASInt B_N,B_one = 1; 905 PetscScalar *x,*y; 906 PetscBool isseqaij; 907 908 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 909 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 910 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 911 if (F) { /* TODO could be optimized for symmetric problems */ 912 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 913 } else { 914 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 915 for (i=0;i<n_vertices;i++) { 916 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 917 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 918 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 921 } 922 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 923 } 924 if (n_benign) { 925 const PetscScalar *vals; 926 const PetscInt *idxs; 927 PetscInt n; 928 929 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 930 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr); 931 ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr); 932 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 933 ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 934 ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr); 935 ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr); 936 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 937 ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr); 938 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 939 } 940 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 941 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 942 if (n_constraints) { 943 Mat B; 944 945 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 946 for (i=0;i<n_vertices;i++) { 947 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 948 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 949 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 953 } 954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 955 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 956 ierr = MatDestroy(&B);CHKERRQ(ierr); 957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 958 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 959 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 960 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 961 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 962 ierr = MatDestroy(&B);CHKERRQ(ierr); 963 } 964 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 965 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 966 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 967 } 968 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 969 if (n_benign) { 970 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 971 } 972 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 973 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 974 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 975 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 976 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 977 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 978 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 979 if (!n_benign) { 980 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 981 } else { 982 const PetscScalar *vals; 983 PetscScalar val; 984 const PetscInt *idxs; 985 PetscInt n,j; 986 987 /* Use explicit loops to populate the local coarse matrix */ 988 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 989 for (j=0;j<n_vertices;j++) { 990 for (i=0;i<n_vertices;i++) { 991 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 992 } 993 } 994 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 995 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 996 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 997 for (j=0;j<n_vertices;j++) { 998 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 999 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 1000 } 1001 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1002 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 1003 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 1004 ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr); 1005 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val; 1006 } 1007 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1008 } else { 1009 /* TODO benign case also here */ 1010 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 1011 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1012 } 1013 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1014 /* coarse basis functions */ 1015 for (i=0;i<n_vertices;i++) { 1016 PetscScalar *y; 1017 1018 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1019 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1020 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 y[n_B*i+idx_V_B[i]] = 1.0; 1024 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1025 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1026 1027 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1028 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1029 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1030 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1033 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1034 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1035 } 1036 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1037 } 1038 if (n_benign) { 1039 PetscScalar *y; 1040 1041 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1042 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1043 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1045 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1046 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1047 1048 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1049 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1050 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr); 1051 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1054 y[n_D*n_vertices+p0_lidx_I] = 0.0; 1055 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1056 } 1057 } 1058 /* if n_R == 0 the object is not destroyed */ 1059 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1060 } 1061 1062 if (n_constraints) { 1063 Mat B; 1064 1065 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1066 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1067 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1068 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1069 if (n_vertices) { 1070 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1071 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1072 } else { 1073 Mat S_VCt; 1074 1075 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1076 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1077 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1078 } 1079 } 1080 ierr = MatDestroy(&B);CHKERRQ(ierr); 1081 /* coarse basis functions */ 1082 for (i=0;i<n_constraints;i++) { 1083 PetscScalar *y; 1084 1085 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1086 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1087 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1088 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1091 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1092 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1093 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1094 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1095 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1097 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1098 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1099 } 1100 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1101 } 1102 } 1103 if (n_constraints) { 1104 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1105 } 1106 1107 if (n_benign) { 1108 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1109 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1110 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1111 ierr = VecDestroy(&vec_R_benign_2);CHKERRQ(ierr); 1112 } 1113 1114 /* compute other basis functions for non-symmetric problems */ 1115 if (!pcbddc->symmetric_primal) { 1116 1117 if (n_constraints) { 1118 Mat S_CCT,B_C; 1119 1120 /* this is a lazy thing */ 1121 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1122 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1123 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1124 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1125 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1126 if (n_vertices) { 1127 Mat B_V,S_VCT; 1128 1129 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1130 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1131 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1132 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1133 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1134 } 1135 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1136 } else { /* if there are no constraints, reset work */ 1137 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1138 } 1139 if (n_vertices && n_R) { 1140 Mat A_VRT; 1141 PetscScalar *marray; 1142 PetscBLASInt B_N,B_one = 1; 1143 1144 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1145 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1146 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1147 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1148 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1149 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1150 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1151 } 1152 1153 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1154 for (i=0;i<pcbddc->local_primal_size;i++) { 1155 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1156 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1157 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1158 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1159 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1160 } 1161 } else { 1162 for (i=0;i<pcbddc->local_primal_size;i++) { 1163 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1164 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1165 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1166 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1167 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1168 } 1169 } 1170 /* coarse basis functions */ 1171 for (i=0;i<pcbddc->local_primal_size;i++) { 1172 PetscScalar *y; 1173 1174 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1175 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1176 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1177 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1179 if (i<n_vertices) { 1180 y[n_B*i+idx_V_B[i]] = 1.0; 1181 } 1182 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1183 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1184 1185 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1186 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1187 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1188 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1189 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1190 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1191 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1192 } 1193 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1194 } 1195 } 1196 /* free memory */ 1197 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1198 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1199 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1200 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1201 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1202 ierr = PetscFree(work);CHKERRQ(ierr); 1203 if (n_vertices) { 1204 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1205 } 1206 if (n_constraints) { 1207 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1208 } 1209 /* Checking coarse_sub_mat and coarse basis functios */ 1210 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1211 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1212 if (pcbddc->dbg_flag) { 1213 Mat coarse_sub_mat; 1214 Mat AUXMAT,TM1,TM2,TM3,TM4; 1215 Mat coarse_phi_D,coarse_phi_B; 1216 Mat coarse_psi_D,coarse_psi_B; 1217 Mat A_II,A_BB,A_IB,A_BI; 1218 Mat C_B,CPHI; 1219 IS is_dummy; 1220 Vec mones; 1221 MatType checkmattype=MATSEQAIJ; 1222 PetscReal real_value; 1223 1224 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1225 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1226 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1227 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1228 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1229 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1230 if (unsymmetric_check) { 1231 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1232 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1233 } 1234 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1238 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1239 if (unsymmetric_check) { 1240 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1241 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1242 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1243 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1244 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1245 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1246 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1247 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1248 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1249 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1250 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1251 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1252 } else { 1253 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1254 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1255 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1256 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1257 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1258 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1259 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1260 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1261 } 1262 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1263 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1264 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1265 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1266 if (n_benign) { 1267 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1268 PetscScalar *data,*data2; 1269 1270 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1271 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1272 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1273 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1274 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1275 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1276 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1277 for (i=0;i<pcbddc->local_primal_size;i++) { 1278 data[n_vertices*pcbddc->local_primal_size+i] += data2[i]; 1279 data[i*pcbddc->local_primal_size+n_vertices] += data2[i]; 1280 } 1281 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1282 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1283 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1284 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1285 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1286 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1287 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1288 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1289 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1290 } 1291 #if 0 1292 { 1293 PetscViewer viewer; 1294 char filename[256]; 1295 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1296 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1297 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1298 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1299 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1300 } 1301 #endif 1302 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1303 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1306 1307 /* check constraints */ 1308 if (!n_benign) { 1309 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1310 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1311 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1312 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1313 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1314 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1315 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1317 if (unsymmetric_check) { 1318 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1319 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1320 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1321 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1323 } 1324 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1325 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1326 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1327 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1328 } 1329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1330 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1331 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1332 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1333 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1334 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1335 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1336 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1337 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1338 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1339 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1340 if (unsymmetric_check) { 1341 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1342 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1343 } 1344 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1345 } 1346 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1347 /* get back data */ 1348 *coarse_submat_vals_n = coarse_submat_vals; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1354 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1355 { 1356 Mat *work_mat; 1357 IS isrow_s,iscol_s; 1358 PetscBool rsorted,csorted; 1359 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1364 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1365 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1366 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1367 1368 if (!rsorted) { 1369 const PetscInt *idxs; 1370 PetscInt *idxs_sorted,i; 1371 1372 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1373 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1374 for (i=0;i<rsize;i++) { 1375 idxs_perm_r[i] = i; 1376 } 1377 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1378 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1379 for (i=0;i<rsize;i++) { 1380 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1381 } 1382 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1383 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1384 } else { 1385 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1386 isrow_s = isrow; 1387 } 1388 1389 if (!csorted) { 1390 if (isrow == iscol) { 1391 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1392 iscol_s = isrow_s; 1393 } else { 1394 const PetscInt *idxs; 1395 PetscInt *idxs_sorted,i; 1396 1397 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1398 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1399 for (i=0;i<csize;i++) { 1400 idxs_perm_c[i] = i; 1401 } 1402 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1403 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1404 for (i=0;i<csize;i++) { 1405 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1406 } 1407 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1408 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1409 } 1410 } else { 1411 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1412 iscol_s = iscol; 1413 } 1414 1415 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1416 1417 if (!rsorted || !csorted) { 1418 Mat new_mat; 1419 IS is_perm_r,is_perm_c; 1420 1421 if (!rsorted) { 1422 PetscInt *idxs_r,i; 1423 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1424 for (i=0;i<rsize;i++) { 1425 idxs_r[idxs_perm_r[i]] = i; 1426 } 1427 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1428 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1429 } else { 1430 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1431 } 1432 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1433 1434 if (!csorted) { 1435 if (isrow_s == iscol_s) { 1436 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1437 is_perm_c = is_perm_r; 1438 } else { 1439 PetscInt *idxs_c,i; 1440 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1441 for (i=0;i<csize;i++) { 1442 idxs_c[idxs_perm_c[i]] = i; 1443 } 1444 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1445 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1446 } 1447 } else { 1448 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1449 } 1450 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1451 1452 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1453 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1454 work_mat[0] = new_mat; 1455 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1456 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1457 } 1458 1459 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1460 *B = work_mat[0]; 1461 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1462 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1463 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1469 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1470 { 1471 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1473 Mat new_mat; 1474 IS is_local,is_global; 1475 PetscInt local_size; 1476 PetscBool isseqaij; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1481 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1483 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1484 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1485 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1486 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1487 1488 /* check */ 1489 if (pcbddc->dbg_flag) { 1490 Vec x,x_change; 1491 PetscReal error; 1492 1493 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1494 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1495 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1496 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1499 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1501 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1502 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1505 ierr = VecDestroy(&x);CHKERRQ(ierr); 1506 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1507 } 1508 1509 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1510 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1511 if (isseqaij) { 1512 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1513 } else { 1514 Mat work_mat; 1515 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1516 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1517 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1518 } 1519 if (matis->A->symmetric_set) { 1520 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1521 #if !defined(PETSC_USE_COMPLEX) 1522 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1523 #endif 1524 } 1525 /* 1526 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1527 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1528 */ 1529 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1535 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1536 { 1537 PC_IS* pcis = (PC_IS*)(pc->data); 1538 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1539 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1540 PetscInt *idx_R_local=NULL; 1541 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1542 PetscInt vbs,bs; 1543 PetscBT bitmask=NULL; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 /* 1548 No need to setup local scatters if 1549 - primal space is unchanged 1550 AND 1551 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1552 AND 1553 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1554 */ 1555 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1556 PetscFunctionReturn(0); 1557 } 1558 /* destroy old objects */ 1559 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1560 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1561 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1562 /* Set Non-overlapping dimensions */ 1563 n_B = pcis->n_B; 1564 n_D = pcis->n - n_B; 1565 n_vertices = pcbddc->n_vertices; 1566 1567 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1568 1569 /* create auxiliary bitmask and allocate workspace */ 1570 if (!sub_schurs->reuse_mumps) { 1571 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1572 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1573 for (i=0;i<n_vertices;i++) { 1574 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1575 } 1576 1577 for (i=0, n_R=0; i<pcis->n; i++) { 1578 if (!PetscBTLookup(bitmask,i)) { 1579 idx_R_local[n_R++] = i; 1580 } 1581 } 1582 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1583 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1584 1585 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1586 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1587 } 1588 1589 /* Block code */ 1590 vbs = 1; 1591 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1592 if (bs>1 && !(n_vertices%bs)) { 1593 PetscBool is_blocked = PETSC_TRUE; 1594 PetscInt *vary; 1595 if (!sub_schurs->reuse_mumps) { 1596 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1597 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1598 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1599 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1600 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1601 for (i=0; i<pcis->n/bs; i++) { 1602 if (vary[i]!=0 && vary[i]!=bs) { 1603 is_blocked = PETSC_FALSE; 1604 break; 1605 } 1606 } 1607 ierr = PetscFree(vary);CHKERRQ(ierr); 1608 } else { 1609 /* Verify directly the R set */ 1610 for (i=0; i<n_R/bs; i++) { 1611 PetscInt j,node=idx_R_local[bs*i]; 1612 for (j=1; j<bs; j++) { 1613 if (node != idx_R_local[bs*i+j]-j) { 1614 is_blocked = PETSC_FALSE; 1615 break; 1616 } 1617 } 1618 } 1619 } 1620 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1621 vbs = bs; 1622 for (i=0;i<n_R/vbs;i++) { 1623 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1624 } 1625 } 1626 } 1627 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1628 if (sub_schurs->reuse_mumps) { 1629 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1630 1631 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1632 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1633 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1634 reuse_mumps->is_R = pcbddc->is_R_local; 1635 } else { 1636 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1637 } 1638 1639 /* print some info if requested */ 1640 if (pcbddc->dbg_flag) { 1641 PetscInt benign = 0; 1642 1643 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1644 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1645 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1649 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1651 } 1652 1653 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1654 if (!sub_schurs->reuse_mumps) { 1655 IS is_aux1,is_aux2; 1656 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1657 1658 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1659 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1660 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1661 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1662 for (i=0; i<n_D; i++) { 1663 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1664 } 1665 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1666 for (i=0, j=0; i<n_R; i++) { 1667 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1668 aux_array1[j++] = i; 1669 } 1670 } 1671 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1672 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1673 for (i=0, j=0; i<n_B; i++) { 1674 if (!PetscBTLookup(bitmask,is_indices[i])) { 1675 aux_array2[j++] = i; 1676 } 1677 } 1678 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1679 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1680 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1681 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1682 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1683 1684 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1685 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1686 for (i=0, j=0; i<n_R; i++) { 1687 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1688 aux_array1[j++] = i; 1689 } 1690 } 1691 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1692 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1693 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1694 } 1695 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1696 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1697 } else { 1698 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1699 IS tis; 1700 PetscInt schur_size; 1701 1702 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1703 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1704 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1705 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1708 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1709 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1710 } 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1718 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1719 { 1720 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1721 PC_IS *pcis = (PC_IS*)pc->data; 1722 PC pc_temp; 1723 Mat A_RR; 1724 MatReuse reuse; 1725 PetscScalar m_one = -1.0; 1726 PetscReal value; 1727 PetscInt n_D,n_R; 1728 PetscBool use_exact,use_exact_reduced,issbaij; 1729 PetscErrorCode ierr; 1730 /* prefixes stuff */ 1731 char dir_prefix[256],neu_prefix[256],str_level[16]; 1732 size_t len; 1733 1734 PetscFunctionBegin; 1735 1736 /* compute prefixes */ 1737 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1738 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1739 if (!pcbddc->current_level) { 1740 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1741 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1742 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1743 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1744 } else { 1745 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1746 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1747 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1748 len -= 15; /* remove "pc_bddc_coarse_" */ 1749 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1750 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1751 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1752 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1753 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1754 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1755 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1756 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1757 } 1758 1759 /* DIRICHLET PROBLEM */ 1760 if (dirichlet) { 1761 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1762 if (pcbddc->local_mat->symmetric_set) { 1763 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1764 } 1765 /* Matrix for Dirichlet problem is pcis->A_II */ 1766 n_D = pcis->n - pcis->n_B; 1767 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1768 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1769 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1770 /* default */ 1771 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1772 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1773 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1774 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1775 if (issbaij) { 1776 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1777 } else { 1778 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1779 } 1780 /* Allow user's customization */ 1781 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1782 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1783 } 1784 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1785 if (sub_schurs->reuse_mumps) { 1786 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1787 1788 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1789 } 1790 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1791 if (!n_D) { 1792 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1793 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1794 } 1795 /* Set Up KSP for Dirichlet problem of BDDC */ 1796 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1797 /* set ksp_D into pcis data */ 1798 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1799 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1800 pcis->ksp_D = pcbddc->ksp_D; 1801 } 1802 1803 /* NEUMANN PROBLEM */ 1804 A_RR = 0; 1805 if (neumann) { 1806 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1807 PetscInt ibs,mbs; 1808 PetscBool issbaij; 1809 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1810 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1811 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1812 if (pcbddc->ksp_R) { /* already created ksp */ 1813 PetscInt nn_R; 1814 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1815 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1816 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1817 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1818 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1819 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1820 reuse = MAT_INITIAL_MATRIX; 1821 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1822 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1823 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1824 reuse = MAT_INITIAL_MATRIX; 1825 } else { /* safe to reuse the matrix */ 1826 reuse = MAT_REUSE_MATRIX; 1827 } 1828 } 1829 /* last check */ 1830 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1831 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1832 reuse = MAT_INITIAL_MATRIX; 1833 } 1834 } else { /* first time, so we need to create the matrix */ 1835 reuse = MAT_INITIAL_MATRIX; 1836 } 1837 /* extract A_RR */ 1838 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1839 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1840 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1841 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1842 if (matis->A == pcbddc->local_mat) { 1843 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1844 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1845 } else { 1846 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1847 } 1848 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1849 if (matis->A == pcbddc->local_mat) { 1850 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1851 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1854 } 1855 } 1856 if (!sub_schurs->reuse_mumps) { 1857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1858 if (pcbddc->local_mat->symmetric_set) { 1859 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1860 } 1861 } else { 1862 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1863 1864 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1865 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1866 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1867 } 1868 if (!pcbddc->ksp_R) { /* create object if not present */ 1869 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1870 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1871 /* default */ 1872 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1873 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1874 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1875 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1876 if (issbaij) { 1877 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1878 } else { 1879 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1880 } 1881 /* Allow user's customization */ 1882 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1883 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1884 } 1885 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1886 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1887 if (!n_R) { 1888 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1889 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1890 } 1891 /* Reuse MUMPS solver if it is present */ 1892 if (sub_schurs->reuse_mumps) { 1893 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1894 1895 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1896 } 1897 /* Set Up KSP for Neumann problem of BDDC */ 1898 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1899 } 1900 /* free Neumann problem's matrix */ 1901 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1902 1903 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1904 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1905 if (pcbddc->dbg_flag) { 1906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1907 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1909 } 1910 if (dirichlet) { /* Dirichlet */ 1911 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1912 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1913 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1914 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1915 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1916 /* need to be adapted? */ 1917 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1918 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1919 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1920 /* print info */ 1921 if (pcbddc->dbg_flag) { 1922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1923 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1924 } 1925 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1926 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1927 } 1928 } 1929 if (neumann) { /* Neumann */ 1930 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1931 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1932 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1934 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1935 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1936 /* need to be adapted? */ 1937 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1938 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1939 /* print info */ 1940 if (pcbddc->dbg_flag) { 1941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1942 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1943 } 1944 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1945 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1946 } 1947 } 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1954 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1955 { 1956 PetscErrorCode ierr; 1957 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1958 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1959 1960 PetscFunctionBegin; 1961 if (!sub_schurs->reuse_mumps) { 1962 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1963 } 1964 if (!pcbddc->switch_static) { 1965 if (applytranspose && pcbddc->local_auxmat1) { 1966 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1967 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1968 } 1969 if (!sub_schurs->reuse_mumps) { 1970 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1971 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1972 } else { 1973 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1974 1975 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1977 } 1978 } else { 1979 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1980 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1981 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1982 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1983 if (applytranspose && pcbddc->local_auxmat1) { 1984 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1985 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1986 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1987 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1988 } 1989 } 1990 if (!sub_schurs->reuse_mumps) { 1991 if (applytranspose) { 1992 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1993 } else { 1994 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1995 } 1996 #if defined(PETSC_HAVE_MUMPS) 1997 } else { 1998 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1999 2000 if (applytranspose) { 2001 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2002 } else { 2003 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2004 } 2005 #endif 2006 } 2007 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2008 if (!pcbddc->switch_static) { 2009 if (!sub_schurs->reuse_mumps) { 2010 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2011 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 } else { 2013 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2014 2015 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2016 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2017 } 2018 if (!applytranspose && pcbddc->local_auxmat1) { 2019 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2020 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2021 } 2022 } else { 2023 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2027 if (!applytranspose && pcbddc->local_auxmat1) { 2028 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2029 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2030 } 2031 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 2039 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2042 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2043 { 2044 PetscErrorCode ierr; 2045 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2046 PC_IS* pcis = (PC_IS*) (pc->data); 2047 const PetscScalar zero = 0.0; 2048 2049 PetscFunctionBegin; 2050 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2051 if (applytranspose) { 2052 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2053 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2054 } else { 2055 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2056 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2057 } 2058 2059 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2060 if (pcbddc->benign_p0_lidx >= 0) { 2061 PetscScalar *array; 2062 2063 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2064 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2065 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2066 } 2067 2068 /* start communications from local primal nodes to rhs of coarse solver */ 2069 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2070 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 2073 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2074 /* TODO remove null space when doing multilevel */ 2075 if (pcbddc->coarse_ksp) { 2076 Vec rhs,sol; 2077 2078 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2079 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2080 if (applytranspose) { 2081 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2082 } else { 2083 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2084 } 2085 } 2086 2087 /* Local solution on R nodes */ 2088 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2089 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2090 } 2091 2092 /* communications from coarse sol to local primal nodes */ 2093 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2094 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2095 2096 /* Sum contributions from two levels */ 2097 if (applytranspose) { 2098 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2099 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2100 } else { 2101 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2102 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2103 } 2104 /* store p0 */ 2105 if (pcbddc->benign_p0_lidx >= 0) { 2106 PetscScalar *array; 2107 2108 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2109 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2110 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2117 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2118 { 2119 PetscErrorCode ierr; 2120 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2121 PetscScalar *array; 2122 Vec from,to; 2123 2124 PetscFunctionBegin; 2125 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2126 from = pcbddc->coarse_vec; 2127 to = pcbddc->vec1_P; 2128 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2129 Vec tvec; 2130 2131 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2132 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2133 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2134 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2135 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2136 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2137 } 2138 } else { /* from local to global -> put data in coarse right hand side */ 2139 from = pcbddc->vec1_P; 2140 to = pcbddc->coarse_vec; 2141 } 2142 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2148 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2149 { 2150 PetscErrorCode ierr; 2151 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2152 PetscScalar *array; 2153 Vec from,to; 2154 2155 PetscFunctionBegin; 2156 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2157 from = pcbddc->coarse_vec; 2158 to = pcbddc->vec1_P; 2159 } else { /* from local to global -> put data in coarse right hand side */ 2160 from = pcbddc->vec1_P; 2161 to = pcbddc->coarse_vec; 2162 } 2163 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2164 if (smode == SCATTER_FORWARD) { 2165 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2166 Vec tvec; 2167 2168 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2169 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2170 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2171 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2172 } 2173 } else { 2174 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2175 ierr = VecResetArray(from);CHKERRQ(ierr); 2176 } 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* uncomment for testing purposes */ 2182 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2185 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2186 { 2187 PetscErrorCode ierr; 2188 PC_IS* pcis = (PC_IS*)(pc->data); 2189 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2190 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2191 /* one and zero */ 2192 PetscScalar one=1.0,zero=0.0; 2193 /* space to store constraints and their local indices */ 2194 PetscScalar *constraints_data; 2195 PetscInt *constraints_idxs,*constraints_idxs_B; 2196 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2197 PetscInt *constraints_n; 2198 /* iterators */ 2199 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2200 /* BLAS integers */ 2201 PetscBLASInt lwork,lierr; 2202 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2203 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2204 /* reuse */ 2205 PetscInt olocal_primal_size,olocal_primal_size_cc; 2206 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2207 /* change of basis */ 2208 PetscBool qr_needed; 2209 PetscBT change_basis,qr_needed_idx; 2210 /* auxiliary stuff */ 2211 PetscInt *nnz,*is_indices; 2212 PetscInt ncc; 2213 /* some quantities */ 2214 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2215 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2216 2217 PetscFunctionBegin; 2218 /* Destroy Mat objects computed previously */ 2219 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2220 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2221 /* save info on constraints from previous setup (if any) */ 2222 olocal_primal_size = pcbddc->local_primal_size; 2223 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2224 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2225 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2226 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2227 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2228 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2229 2230 /* print some info */ 2231 if (pcbddc->dbg_flag) { 2232 IS vertices; 2233 PetscInt nv,nedges,nfaces; 2234 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2235 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2236 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2237 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2242 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2243 } 2244 2245 if (!pcbddc->adaptive_selection) { 2246 IS ISForVertices,*ISForFaces,*ISForEdges; 2247 MatNullSpace nearnullsp; 2248 const Vec *nearnullvecs; 2249 Vec *localnearnullsp; 2250 PetscScalar *array; 2251 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2252 PetscBool nnsp_has_cnst; 2253 /* LAPACK working arrays for SVD or POD */ 2254 PetscBool skip_lapack,boolforchange; 2255 PetscScalar *work; 2256 PetscReal *singular_vals; 2257 #if defined(PETSC_USE_COMPLEX) 2258 PetscReal *rwork; 2259 #endif 2260 #if defined(PETSC_MISSING_LAPACK_GESVD) 2261 PetscScalar *temp_basis,*correlation_mat; 2262 #else 2263 PetscBLASInt dummy_int=1; 2264 PetscScalar dummy_scalar=1.; 2265 #endif 2266 2267 /* Get index sets for faces, edges and vertices from graph */ 2268 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2269 /* free unneeded index sets */ 2270 if (!pcbddc->use_vertices) { 2271 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2272 } 2273 if (!pcbddc->use_edges) { 2274 for (i=0;i<n_ISForEdges;i++) { 2275 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2276 } 2277 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2278 n_ISForEdges = 0; 2279 } 2280 if (!pcbddc->use_faces) { 2281 for (i=0;i<n_ISForFaces;i++) { 2282 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2283 } 2284 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2285 n_ISForFaces = 0; 2286 } 2287 2288 #if defined(PETSC_USE_DEBUG) 2289 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2290 Also use_change_of_basis should be consistent among processors */ 2291 if (pcbddc->NullSpace) { 2292 PetscBool tbool[2],gbool[2]; 2293 2294 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2295 pcbddc->use_change_of_basis = PETSC_TRUE; 2296 if (!ISForEdges) { 2297 pcbddc->use_change_on_faces = PETSC_TRUE; 2298 } 2299 } 2300 tbool[0] = pcbddc->use_change_of_basis; 2301 tbool[1] = pcbddc->use_change_on_faces; 2302 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2303 pcbddc->use_change_of_basis = gbool[0]; 2304 pcbddc->use_change_on_faces = gbool[1]; 2305 } 2306 #endif 2307 2308 /* check if near null space is attached to global mat */ 2309 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2310 if (nearnullsp) { 2311 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2312 /* remove any stored info */ 2313 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2314 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2315 /* store information for BDDC solver reuse */ 2316 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2317 pcbddc->onearnullspace = nearnullsp; 2318 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2319 for (i=0;i<nnsp_size;i++) { 2320 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2321 } 2322 } else { /* if near null space is not provided BDDC uses constants by default */ 2323 nnsp_size = 0; 2324 nnsp_has_cnst = PETSC_TRUE; 2325 } 2326 /* get max number of constraints on a single cc */ 2327 max_constraints = nnsp_size; 2328 if (nnsp_has_cnst) max_constraints++; 2329 2330 /* 2331 Evaluate maximum storage size needed by the procedure 2332 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2333 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2334 There can be multiple constraints per connected component 2335 */ 2336 n_vertices = 0; 2337 if (ISForVertices) { 2338 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2339 } 2340 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2341 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2342 2343 total_counts = n_ISForFaces+n_ISForEdges; 2344 total_counts *= max_constraints; 2345 total_counts += n_vertices; 2346 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2347 2348 total_counts = 0; 2349 max_size_of_constraint = 0; 2350 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2351 IS used_is; 2352 if (i<n_ISForEdges) { 2353 used_is = ISForEdges[i]; 2354 } else { 2355 used_is = ISForFaces[i-n_ISForEdges]; 2356 } 2357 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2358 total_counts += j; 2359 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2360 } 2361 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2362 2363 /* get local part of global near null space vectors */ 2364 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2365 for (k=0;k<nnsp_size;k++) { 2366 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2367 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2368 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2369 } 2370 2371 /* whether or not to skip lapack calls */ 2372 skip_lapack = PETSC_TRUE; 2373 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2374 2375 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2376 if (!skip_lapack) { 2377 PetscScalar temp_work; 2378 2379 #if defined(PETSC_MISSING_LAPACK_GESVD) 2380 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2381 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2382 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2383 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2384 #if defined(PETSC_USE_COMPLEX) 2385 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2386 #endif 2387 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2388 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2389 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2390 lwork = -1; 2391 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2392 #if !defined(PETSC_USE_COMPLEX) 2393 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2394 #else 2395 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2396 #endif 2397 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2398 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2399 #else /* on missing GESVD */ 2400 /* SVD */ 2401 PetscInt max_n,min_n; 2402 max_n = max_size_of_constraint; 2403 min_n = max_constraints; 2404 if (max_size_of_constraint < max_constraints) { 2405 min_n = max_size_of_constraint; 2406 max_n = max_constraints; 2407 } 2408 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2409 #if defined(PETSC_USE_COMPLEX) 2410 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2411 #endif 2412 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2413 lwork = -1; 2414 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2416 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2418 #if !defined(PETSC_USE_COMPLEX) 2419 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2420 #else 2421 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2422 #endif 2423 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2424 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2425 #endif /* on missing GESVD */ 2426 /* Allocate optimal workspace */ 2427 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2428 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2429 } 2430 /* Now we can loop on constraining sets */ 2431 total_counts = 0; 2432 constraints_idxs_ptr[0] = 0; 2433 constraints_data_ptr[0] = 0; 2434 /* vertices */ 2435 if (n_vertices) { 2436 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2437 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2438 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2439 for (i=0;i<n_vertices;i++) { 2440 constraints_n[total_counts] = 1; 2441 constraints_data[total_counts] = 1.0; 2442 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2443 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2444 total_counts++; 2445 } 2446 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2447 PetscBool used_vertex; 2448 for (i=0;i<n_vertices;i++) { 2449 used_vertex = PETSC_FALSE; 2450 k = 0; 2451 while (!used_vertex && k<nnsp_size) { 2452 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2453 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2454 constraints_n[total_counts] = 1; 2455 constraints_idxs[total_counts] = is_indices[i]; 2456 constraints_data[total_counts] = 1.0; 2457 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2458 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2459 total_counts++; 2460 used_vertex = PETSC_TRUE; 2461 } 2462 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2463 k++; 2464 } 2465 } 2466 } 2467 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2468 n_vertices = total_counts; 2469 } 2470 2471 /* edges and faces */ 2472 total_counts_cc = total_counts; 2473 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2474 IS used_is; 2475 PetscBool idxs_copied = PETSC_FALSE; 2476 2477 if (ncc<n_ISForEdges) { 2478 used_is = ISForEdges[ncc]; 2479 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2480 } else { 2481 used_is = ISForFaces[ncc-n_ISForEdges]; 2482 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2483 } 2484 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2485 2486 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2487 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2488 /* change of basis should not be performed on local periodic nodes */ 2489 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2490 if (nnsp_has_cnst) { 2491 PetscScalar quad_value; 2492 2493 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2494 idxs_copied = PETSC_TRUE; 2495 2496 if (!pcbddc->use_nnsp_true) { 2497 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2498 } else { 2499 quad_value = 1.0; 2500 } 2501 for (j=0;j<size_of_constraint;j++) { 2502 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2503 } 2504 temp_constraints++; 2505 total_counts++; 2506 } 2507 for (k=0;k<nnsp_size;k++) { 2508 PetscReal real_value; 2509 PetscScalar *ptr_to_data; 2510 2511 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2512 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2513 for (j=0;j<size_of_constraint;j++) { 2514 ptr_to_data[j] = array[is_indices[j]]; 2515 } 2516 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2517 /* check if array is null on the connected component */ 2518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2519 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2520 if (real_value > 0.0) { /* keep indices and values */ 2521 temp_constraints++; 2522 total_counts++; 2523 if (!idxs_copied) { 2524 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2525 idxs_copied = PETSC_TRUE; 2526 } 2527 } 2528 } 2529 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2530 valid_constraints = temp_constraints; 2531 if (!pcbddc->use_nnsp_true && temp_constraints) { 2532 if (temp_constraints == 1) { /* just normalize the constraint */ 2533 PetscScalar norm,*ptr_to_data; 2534 2535 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2537 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2538 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2539 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2540 } else { /* perform SVD */ 2541 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2542 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2543 2544 #if defined(PETSC_MISSING_LAPACK_GESVD) 2545 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2546 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2547 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2548 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2549 from that computed using LAPACKgesvd 2550 -> This is due to a different computation of eigenvectors in LAPACKheev 2551 -> The quality of the POD-computed basis will be the same */ 2552 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2553 /* Store upper triangular part of correlation matrix */ 2554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2556 for (j=0;j<temp_constraints;j++) { 2557 for (k=0;k<j+1;k++) { 2558 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2559 } 2560 } 2561 /* compute eigenvalues and eigenvectors of correlation matrix */ 2562 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2564 #if !defined(PETSC_USE_COMPLEX) 2565 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2566 #else 2567 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2568 #endif 2569 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2570 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2571 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2572 j = 0; 2573 while (j < temp_constraints && singular_vals[j] < tol) j++; 2574 total_counts = total_counts-j; 2575 valid_constraints = temp_constraints-j; 2576 /* scale and copy POD basis into used quadrature memory */ 2577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2582 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2583 if (j<temp_constraints) { 2584 PetscInt ii; 2585 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2587 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2589 for (k=0;k<temp_constraints-j;k++) { 2590 for (ii=0;ii<size_of_constraint;ii++) { 2591 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2592 } 2593 } 2594 } 2595 #else /* on missing GESVD */ 2596 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2597 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2599 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2600 #if !defined(PETSC_USE_COMPLEX) 2601 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2602 #else 2603 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2604 #endif 2605 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2607 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2608 k = temp_constraints; 2609 if (k > size_of_constraint) k = size_of_constraint; 2610 j = 0; 2611 while (j < k && singular_vals[k-j-1] < tol) j++; 2612 valid_constraints = k-j; 2613 total_counts = total_counts-temp_constraints+valid_constraints; 2614 #endif /* on missing GESVD */ 2615 } 2616 } 2617 /* update pointers information */ 2618 if (valid_constraints) { 2619 constraints_n[total_counts_cc] = valid_constraints; 2620 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2621 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2622 /* set change_of_basis flag */ 2623 if (boolforchange) { 2624 PetscBTSet(change_basis,total_counts_cc); 2625 } 2626 total_counts_cc++; 2627 } 2628 } 2629 /* free workspace */ 2630 if (!skip_lapack) { 2631 ierr = PetscFree(work);CHKERRQ(ierr); 2632 #if defined(PETSC_USE_COMPLEX) 2633 ierr = PetscFree(rwork);CHKERRQ(ierr); 2634 #endif 2635 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2636 #if defined(PETSC_MISSING_LAPACK_GESVD) 2637 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2638 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2639 #endif 2640 } 2641 for (k=0;k<nnsp_size;k++) { 2642 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2643 } 2644 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2645 /* free index sets of faces, edges and vertices */ 2646 for (i=0;i<n_ISForFaces;i++) { 2647 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2648 } 2649 if (n_ISForFaces) { 2650 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2651 } 2652 for (i=0;i<n_ISForEdges;i++) { 2653 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2654 } 2655 if (n_ISForEdges) { 2656 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2657 } 2658 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2659 } else { 2660 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2661 2662 total_counts = 0; 2663 n_vertices = 0; 2664 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2665 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2666 } 2667 max_constraints = 0; 2668 total_counts_cc = 0; 2669 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2670 total_counts += pcbddc->adaptive_constraints_n[i]; 2671 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2672 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2673 } 2674 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2675 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2676 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2677 constraints_data = pcbddc->adaptive_constraints_data; 2678 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2679 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2680 total_counts_cc = 0; 2681 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2682 if (pcbddc->adaptive_constraints_n[i]) { 2683 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2684 } 2685 } 2686 #if 0 2687 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2688 for (i=0;i<total_counts_cc;i++) { 2689 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2690 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2691 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2692 printf(" %d",constraints_idxs[j]); 2693 } 2694 printf("\n"); 2695 printf("number of cc: %d\n",constraints_n[i]); 2696 } 2697 for (i=0;i<n_vertices;i++) { 2698 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2699 } 2700 for (i=0;i<sub_schurs->n_subs;i++) { 2701 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2702 } 2703 #endif 2704 2705 max_size_of_constraint = 0; 2706 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2707 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2708 /* Change of basis */ 2709 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2710 if (pcbddc->use_change_of_basis) { 2711 for (i=0;i<sub_schurs->n_subs;i++) { 2712 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2713 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2714 } 2715 } 2716 } 2717 } 2718 pcbddc->local_primal_size = total_counts; 2719 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2720 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2721 2722 /* map constraints_idxs in boundary numbering */ 2723 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2724 if (i != constraints_idxs_ptr[total_counts_cc]) { 2725 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2726 } 2727 2728 /* Create constraint matrix */ 2729 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2730 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2731 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2732 2733 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2734 /* determine if a QR strategy is needed for change of basis */ 2735 qr_needed = PETSC_FALSE; 2736 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2737 total_primal_vertices=0; 2738 pcbddc->local_primal_size_cc = 0; 2739 for (i=0;i<total_counts_cc;i++) { 2740 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2741 if (size_of_constraint == 1) { 2742 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2743 pcbddc->local_primal_size_cc += 1; 2744 } else if (PetscBTLookup(change_basis,i)) { 2745 for (k=0;k<constraints_n[i];k++) { 2746 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2747 } 2748 pcbddc->local_primal_size_cc += constraints_n[i]; 2749 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2750 PetscBTSet(qr_needed_idx,i); 2751 qr_needed = PETSC_TRUE; 2752 } 2753 } else { 2754 pcbddc->local_primal_size_cc += 1; 2755 } 2756 } 2757 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2758 pcbddc->n_vertices = total_primal_vertices; 2759 /* permute indices in order to have a sorted set of vertices */ 2760 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2761 2762 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2763 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2764 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2765 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2766 2767 /* nonzero structure of constraint matrix */ 2768 /* and get reference dof for local constraints */ 2769 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2770 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2771 2772 j = total_primal_vertices; 2773 total_counts = total_primal_vertices; 2774 cum = total_primal_vertices; 2775 for (i=n_vertices;i<total_counts_cc;i++) { 2776 if (!PetscBTLookup(change_basis,i)) { 2777 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2778 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2779 cum++; 2780 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2781 for (k=0;k<constraints_n[i];k++) { 2782 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2783 nnz[j+k] = size_of_constraint; 2784 } 2785 j += constraints_n[i]; 2786 } 2787 } 2788 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2789 ierr = PetscFree(nnz);CHKERRQ(ierr); 2790 2791 /* set values in constraint matrix */ 2792 for (i=0;i<total_primal_vertices;i++) { 2793 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2794 } 2795 total_counts = total_primal_vertices; 2796 for (i=n_vertices;i<total_counts_cc;i++) { 2797 if (!PetscBTLookup(change_basis,i)) { 2798 PetscInt *cols; 2799 2800 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2801 cols = constraints_idxs+constraints_idxs_ptr[i]; 2802 for (k=0;k<constraints_n[i];k++) { 2803 PetscInt row = total_counts+k; 2804 PetscScalar *vals; 2805 2806 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2807 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2808 } 2809 total_counts += constraints_n[i]; 2810 } 2811 } 2812 /* assembling */ 2813 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 2816 /* 2817 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2818 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2819 */ 2820 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2821 if (pcbddc->use_change_of_basis) { 2822 /* dual and primal dofs on a single cc */ 2823 PetscInt dual_dofs,primal_dofs; 2824 /* working stuff for GEQRF */ 2825 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2826 PetscBLASInt lqr_work; 2827 /* working stuff for UNGQR */ 2828 PetscScalar *gqr_work,lgqr_work_t; 2829 PetscBLASInt lgqr_work; 2830 /* working stuff for TRTRS */ 2831 PetscScalar *trs_rhs; 2832 PetscBLASInt Blas_NRHS; 2833 /* pointers for values insertion into change of basis matrix */ 2834 PetscInt *start_rows,*start_cols; 2835 PetscScalar *start_vals; 2836 /* working stuff for values insertion */ 2837 PetscBT is_primal; 2838 PetscInt *aux_primal_numbering_B; 2839 /* matrix sizes */ 2840 PetscInt global_size,local_size; 2841 /* temporary change of basis */ 2842 Mat localChangeOfBasisMatrix; 2843 /* extra space for debugging */ 2844 PetscScalar *dbg_work; 2845 2846 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2847 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2848 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2849 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2850 /* nonzeros for local mat */ 2851 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2852 for (i=0;i<pcis->n;i++) nnz[i]=1; 2853 for (i=n_vertices;i<total_counts_cc;i++) { 2854 if (PetscBTLookup(change_basis,i)) { 2855 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2856 if (PetscBTLookup(qr_needed_idx,i)) { 2857 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2858 } else { 2859 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2860 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2861 } 2862 } 2863 } 2864 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2865 ierr = PetscFree(nnz);CHKERRQ(ierr); 2866 /* Set initial identity in the matrix */ 2867 for (i=0;i<pcis->n;i++) { 2868 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2869 } 2870 2871 if (pcbddc->dbg_flag) { 2872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2874 } 2875 2876 2877 /* Now we loop on the constraints which need a change of basis */ 2878 /* 2879 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2880 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2881 2882 Basic blocks of change of basis matrix T computed by 2883 2884 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2885 2886 | 1 0 ... 0 s_1/S | 2887 | 0 1 ... 0 s_2/S | 2888 | ... | 2889 | 0 ... 1 s_{n-1}/S | 2890 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2891 2892 with S = \sum_{i=1}^n s_i^2 2893 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2894 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2895 2896 - QR decomposition of constraints otherwise 2897 */ 2898 if (qr_needed) { 2899 /* space to store Q */ 2900 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2901 /* first we issue queries for optimal work */ 2902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2904 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2905 lqr_work = -1; 2906 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2907 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2908 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2909 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2910 lgqr_work = -1; 2911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2913 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2914 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2915 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2916 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2917 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2918 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2919 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2920 /* array to store scaling factors for reflectors */ 2921 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2922 /* array to store rhs and solution of triangular solver */ 2923 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2924 /* allocating workspace for check */ 2925 if (pcbddc->dbg_flag) { 2926 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2927 } 2928 } 2929 /* array to store whether a node is primal or not */ 2930 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2931 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2932 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2933 if (i != total_primal_vertices) { 2934 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2935 } 2936 for (i=0;i<total_primal_vertices;i++) { 2937 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2938 } 2939 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2940 2941 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2942 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2943 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2944 if (PetscBTLookup(change_basis,total_counts)) { 2945 /* get constraint info */ 2946 primal_dofs = constraints_n[total_counts]; 2947 dual_dofs = size_of_constraint-primal_dofs; 2948 2949 if (pcbddc->dbg_flag) { 2950 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2951 } 2952 2953 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2954 2955 /* copy quadrature constraints for change of basis check */ 2956 if (pcbddc->dbg_flag) { 2957 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2958 } 2959 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2960 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2961 2962 /* compute QR decomposition of constraints */ 2963 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2964 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2966 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2967 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2968 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2969 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2970 2971 /* explictly compute R^-T */ 2972 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2973 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2974 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2975 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2977 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2978 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2979 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2980 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2981 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2982 2983 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2984 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2986 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2988 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2989 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2990 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2992 2993 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2994 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2995 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2996 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2997 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2998 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2999 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3000 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3002 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3003 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3004 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3005 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3006 3007 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3008 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3009 /* insert cols for primal dofs */ 3010 for (j=0;j<primal_dofs;j++) { 3011 start_vals = &qr_basis[j*size_of_constraint]; 3012 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3013 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3014 } 3015 /* insert cols for dual dofs */ 3016 for (j=0,k=0;j<dual_dofs;k++) { 3017 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3018 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3019 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3020 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3021 j++; 3022 } 3023 } 3024 3025 /* check change of basis */ 3026 if (pcbddc->dbg_flag) { 3027 PetscInt ii,jj; 3028 PetscBool valid_qr=PETSC_TRUE; 3029 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3034 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3035 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3036 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3037 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3038 for (jj=0;jj<size_of_constraint;jj++) { 3039 for (ii=0;ii<primal_dofs;ii++) { 3040 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3041 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3042 } 3043 } 3044 if (!valid_qr) { 3045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3046 for (jj=0;jj<size_of_constraint;jj++) { 3047 for (ii=0;ii<primal_dofs;ii++) { 3048 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3049 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3050 } 3051 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3052 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3053 } 3054 } 3055 } 3056 } else { 3057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3058 } 3059 } 3060 } else { /* simple transformation block */ 3061 PetscInt row,col; 3062 PetscScalar val,norm; 3063 3064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3065 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3066 for (j=0;j<size_of_constraint;j++) { 3067 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3068 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3069 if (!PetscBTLookup(is_primal,row_B)) { 3070 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3071 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3072 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3073 } else { 3074 for (k=0;k<size_of_constraint;k++) { 3075 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3076 if (row != col) { 3077 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3078 } else { 3079 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3080 } 3081 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3082 } 3083 } 3084 } 3085 if (pcbddc->dbg_flag) { 3086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3087 } 3088 } 3089 } else { 3090 if (pcbddc->dbg_flag) { 3091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3092 } 3093 } 3094 } 3095 3096 /* free workspace */ 3097 if (qr_needed) { 3098 if (pcbddc->dbg_flag) { 3099 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3100 } 3101 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3102 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3103 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3104 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3105 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3106 } 3107 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3108 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 3111 /* assembling of global change of variable */ 3112 { 3113 Mat tmat; 3114 PetscInt bs; 3115 3116 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3117 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3118 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3119 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3120 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3121 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3122 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3123 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3124 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3125 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3126 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3127 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3128 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3129 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3130 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3131 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3132 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3133 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3134 } 3135 /* check */ 3136 if (pcbddc->dbg_flag) { 3137 PetscReal error; 3138 Vec x,x_change; 3139 3140 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3141 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3142 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3143 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3144 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3146 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3147 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3148 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3149 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3150 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3151 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3154 ierr = VecDestroy(&x);CHKERRQ(ierr); 3155 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3156 } 3157 3158 /* adapt sub_schurs computed (if any) */ 3159 if (pcbddc->use_deluxe_scaling) { 3160 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3161 if (sub_schurs->S_Ej_all) { 3162 Mat S_new,tmat; 3163 ISLocalToGlobalMapping NtoSall; 3164 IS is_all_N,is_V,is_V_Sall; 3165 const PetscScalar *array; 3166 const PetscInt *idxs_V,*idxs_all; 3167 PetscInt i,n_V; 3168 3169 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3170 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3171 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3172 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3173 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3174 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3175 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3176 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3177 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3178 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3179 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3180 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3181 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3182 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3183 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3184 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3185 for (i=0;i<n_V;i++) { 3186 PetscScalar val; 3187 PetscInt idx; 3188 3189 idx = idxs_V[i]; 3190 val = array[idxs_all[idxs_V[i]]]; 3191 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3192 } 3193 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3194 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3195 sub_schurs->S_Ej_all = S_new; 3196 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3197 if (sub_schurs->sum_S_Ej_all) { 3198 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3199 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3200 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3201 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3202 sub_schurs->sum_S_Ej_all = S_new; 3203 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3204 } 3205 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3206 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3207 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3208 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3209 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3210 } 3211 } 3212 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3213 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3214 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3215 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3216 } 3217 3218 /* set up change of basis context */ 3219 if (pcbddc->ChangeOfBasisMatrix) { 3220 PCBDDCChange_ctx change_ctx; 3221 3222 if (!pcbddc->new_global_mat) { 3223 PetscInt global_size,local_size; 3224 3225 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3226 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3227 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3228 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3229 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3230 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3231 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3232 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3233 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3234 } else { 3235 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3236 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3237 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3238 } 3239 if (!pcbddc->user_ChangeOfBasisMatrix) { 3240 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3241 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3242 } else { 3243 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3244 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3245 } 3246 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3247 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3248 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3249 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3250 } 3251 3252 /* add pressure dof to set of primal nodes for numbering purposes */ 3253 if (pcbddc->benign_p0_lidx >= 0) { 3254 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3255 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3256 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3257 pcbddc->local_primal_size_cc++; 3258 pcbddc->local_primal_size++; 3259 } 3260 3261 /* check if a new primal space has been introduced (also take into account benign trick) */ 3262 pcbddc->new_primal_space_local = PETSC_TRUE; 3263 if (olocal_primal_size == pcbddc->local_primal_size) { 3264 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3265 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3266 if (!pcbddc->new_primal_space_local) { 3267 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3268 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3269 } 3270 } 3271 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3272 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3273 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3274 3275 /* flush dbg viewer */ 3276 if (pcbddc->dbg_flag) { 3277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3278 } 3279 3280 /* free workspace */ 3281 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3282 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3283 if (!pcbddc->adaptive_selection) { 3284 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3285 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3286 } else { 3287 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3288 pcbddc->adaptive_constraints_idxs_ptr, 3289 pcbddc->adaptive_constraints_data_ptr, 3290 pcbddc->adaptive_constraints_idxs, 3291 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3292 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3293 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3294 } 3295 PetscFunctionReturn(0); 3296 } 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3300 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3301 { 3302 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3303 PC_IS *pcis = (PC_IS*)pc->data; 3304 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3305 PetscInt ierr,i,vertex_size,N; 3306 PetscViewer viewer=pcbddc->dbg_viewer; 3307 3308 PetscFunctionBegin; 3309 /* Reset previously computed graph */ 3310 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3311 /* Init local Graph struct */ 3312 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3313 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3314 3315 /* Check validity of the csr graph passed in by the user */ 3316 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3317 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3318 } 3319 3320 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3321 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3322 PetscInt *xadj,*adjncy; 3323 PetscInt nvtxs; 3324 PetscBool flg_row=PETSC_FALSE; 3325 3326 if (pcbddc->use_local_adj) { 3327 3328 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3329 if (flg_row) { 3330 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3331 pcbddc->computed_rowadj = PETSC_TRUE; 3332 } 3333 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3334 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3335 IS is_dummy; 3336 ISLocalToGlobalMapping l2gmap_dummy; 3337 PetscInt j,sum; 3338 PetscInt *cxadj,*cadjncy; 3339 const PetscInt *idxs; 3340 PCBDDCGraph graph; 3341 PetscBT is_on_boundary; 3342 3343 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3344 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3345 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3346 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3347 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3348 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3349 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3350 if (flg_row) { 3351 graph->xadj = xadj; 3352 graph->adjncy = adjncy; 3353 } 3354 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3355 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3356 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3357 3358 if (pcbddc->dbg_flag) { 3359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3360 for (i=0;i<graph->ncc;i++) { 3361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3362 } 3363 } 3364 3365 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3366 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3367 for (i=0;i<pcis->n_B;i++) { 3368 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3369 } 3370 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3371 3372 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3373 sum = 0; 3374 for (i=0;i<graph->ncc;i++) { 3375 PetscInt sizecc = 0; 3376 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3377 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3378 sizecc++; 3379 } 3380 } 3381 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3382 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3383 cxadj[graph->queue[j]] = sizecc; 3384 } 3385 } 3386 sum += sizecc*sizecc; 3387 } 3388 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3389 sum = 0; 3390 for (i=0;i<pcis->n;i++) { 3391 PetscInt temp = cxadj[i]; 3392 cxadj[i] = sum; 3393 sum += temp; 3394 } 3395 cxadj[pcis->n] = sum; 3396 for (i=0;i<graph->ncc;i++) { 3397 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3398 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3399 PetscInt k,sizecc = 0; 3400 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3401 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3402 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3403 sizecc++; 3404 } 3405 } 3406 } 3407 } 3408 } 3409 if (sum) { 3410 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3411 } else { 3412 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3413 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3414 } 3415 graph->xadj = 0; 3416 graph->adjncy = 0; 3417 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3418 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3419 } 3420 } 3421 if (pcbddc->dbg_flag) { 3422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3423 } 3424 3425 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3426 vertex_size = 1; 3427 if (pcbddc->user_provided_isfordofs) { 3428 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3429 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3430 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3431 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3432 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3433 } 3434 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3435 pcbddc->n_ISForDofs = 0; 3436 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3437 } 3438 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3439 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3440 } else { 3441 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3442 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3443 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3444 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3445 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3446 } 3447 } 3448 } 3449 3450 /* Setup of Graph */ 3451 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3452 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3453 } 3454 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3455 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3456 } 3457 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3458 3459 /* Graph's connected components analysis */ 3460 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3461 3462 /* print some info to stdout */ 3463 if (pcbddc->dbg_flag) { 3464 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3465 } 3466 3467 /* mark topography has done */ 3468 pcbddc->recompute_topography = PETSC_FALSE; 3469 PetscFunctionReturn(0); 3470 } 3471 3472 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3473 #undef __FUNCT__ 3474 #define __FUNCT__ "PCBDDCSubsetNumbering" 3475 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3476 { 3477 PetscSF sf; 3478 PetscLayout map; 3479 const PetscInt *idxs; 3480 PetscInt *leaf_data,*root_data,*gidxs; 3481 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3482 PetscInt n_n,nlocals,start,first_index; 3483 PetscMPIInt commsize; 3484 PetscBool first_found; 3485 PetscErrorCode ierr; 3486 3487 PetscFunctionBegin; 3488 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3489 if (subset_mult) { 3490 PetscCheckSameComm(subset,1,subset_mult,2); 3491 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3492 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3493 } 3494 /* create workspace layout for computing global indices of subset */ 3495 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3496 lbounds[0] = lbounds[1] = 0; 3497 for (i=0;i<n;i++) { 3498 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3499 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3500 } 3501 lbounds[0] = -lbounds[0]; 3502 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3503 gbounds[0] = -gbounds[0]; 3504 N = gbounds[1] - gbounds[0] + 1; 3505 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3506 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3507 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3508 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3509 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3510 3511 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3512 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3513 if (subset_mult) { 3514 const PetscInt* idxs_mult; 3515 3516 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3517 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3518 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3519 } else { 3520 for (i=0;i<n;i++) leaf_data[i] = 1; 3521 } 3522 /* local size of new subset */ 3523 n_n = 0; 3524 for (i=0;i<n;i++) n_n += leaf_data[i]; 3525 3526 /* global indexes in layout */ 3527 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3528 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3529 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3530 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3531 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3532 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3533 3534 /* reduce from leaves to roots */ 3535 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3536 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3537 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3538 3539 /* count indexes in local part of layout */ 3540 nlocals = 0; 3541 first_index = -1; 3542 first_found = PETSC_FALSE; 3543 for (i=0;i<Nl;i++) { 3544 if (!first_found && root_data[i]) { 3545 first_found = PETSC_TRUE; 3546 first_index = i; 3547 } 3548 nlocals += root_data[i]; 3549 } 3550 3551 /* cumulative of number of indexes and size of subset without holes */ 3552 #if defined(PETSC_HAVE_MPI_EXSCAN) 3553 start = 0; 3554 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3555 #else 3556 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3557 start = start-nlocals; 3558 #endif 3559 3560 if (N_n) { /* compute total size of new subset if requested */ 3561 *N_n = start + nlocals; 3562 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3563 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3564 } 3565 3566 /* adapt root data with cumulative */ 3567 if (first_found) { 3568 PetscInt old_index; 3569 3570 root_data[first_index] += start; 3571 old_index = first_index; 3572 for (i=first_index+1;i<Nl;i++) { 3573 if (root_data[i]) { 3574 root_data[i] += root_data[old_index]; 3575 old_index = i; 3576 } 3577 } 3578 } 3579 3580 /* from roots to leaves */ 3581 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3582 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3583 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3584 3585 /* create new IS with global indexes without holes */ 3586 if (subset_mult) { 3587 const PetscInt* idxs_mult; 3588 PetscInt cum; 3589 3590 cum = 0; 3591 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3592 for (i=0;i<n;i++) { 3593 PetscInt j; 3594 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3595 } 3596 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3597 } else { 3598 for (i=0;i<n;i++) { 3599 gidxs[i] = leaf_data[i]-1; 3600 } 3601 } 3602 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3603 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3604 PetscFunctionReturn(0); 3605 } 3606 3607 #undef __FUNCT__ 3608 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3609 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3610 { 3611 PetscInt i,j; 3612 PetscScalar *alphas; 3613 PetscErrorCode ierr; 3614 3615 PetscFunctionBegin; 3616 /* this implements stabilized Gram-Schmidt */ 3617 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3618 for (i=0;i<n;i++) { 3619 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3620 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3621 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3622 } 3623 ierr = PetscFree(alphas);CHKERRQ(ierr); 3624 PetscFunctionReturn(0); 3625 } 3626 3627 #undef __FUNCT__ 3628 #define __FUNCT__ "MatISGetSubassemblingPattern" 3629 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3630 { 3631 IS ranks_send_to; 3632 PetscInt n_neighs,*neighs,*n_shared,**shared; 3633 PetscMPIInt size,rank,color; 3634 PetscInt *xadj,*adjncy; 3635 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3636 PetscInt i,local_size,threshold=0; 3637 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3638 PetscSubcomm subcomm; 3639 PetscErrorCode ierr; 3640 3641 PetscFunctionBegin; 3642 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3643 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3644 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3645 3646 /* Get info on mapping */ 3647 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3648 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3649 3650 /* build local CSR graph of subdomains' connectivity */ 3651 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3652 xadj[0] = 0; 3653 xadj[1] = PetscMax(n_neighs-1,0); 3654 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3655 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3656 3657 if (threshold) { 3658 PetscInt xadj_count = 0; 3659 for (i=1;i<n_neighs;i++) { 3660 if (n_shared[i] > threshold) { 3661 adjncy[xadj_count] = neighs[i]; 3662 adjncy_wgt[xadj_count] = n_shared[i]; 3663 xadj_count++; 3664 } 3665 } 3666 xadj[1] = xadj_count; 3667 } else { 3668 if (xadj[1]) { 3669 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3670 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3671 } 3672 } 3673 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3674 if (use_square) { 3675 for (i=0;i<xadj[1];i++) { 3676 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3677 } 3678 } 3679 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3680 3681 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3682 3683 /* 3684 Restrict work on active processes only. 3685 */ 3686 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3687 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3688 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3689 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3690 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3691 if (color) { 3692 ierr = PetscFree(xadj);CHKERRQ(ierr); 3693 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3694 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3695 } else { 3696 Mat subdomain_adj; 3697 IS new_ranks,new_ranks_contig; 3698 MatPartitioning partitioner; 3699 PetscInt prank,rstart=0,rend=0; 3700 PetscInt *is_indices,*oldranks; 3701 PetscBool aggregate; 3702 3703 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3704 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3705 prank = rank; 3706 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3707 /* 3708 for (i=0;i<size;i++) { 3709 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3710 } 3711 */ 3712 for (i=0;i<xadj[1];i++) { 3713 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3714 } 3715 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3716 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3717 if (aggregate) { 3718 PetscInt lrows,row,ncols,*cols; 3719 PetscMPIInt nrank; 3720 PetscScalar *vals; 3721 3722 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3723 lrows = 0; 3724 if (nrank<redprocs) { 3725 lrows = size/redprocs; 3726 if (nrank<size%redprocs) lrows++; 3727 } 3728 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3729 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3730 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3731 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3732 row = nrank; 3733 ncols = xadj[1]-xadj[0]; 3734 cols = adjncy; 3735 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3736 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3737 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3738 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3739 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = PetscFree(xadj);CHKERRQ(ierr); 3741 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3742 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3743 ierr = PetscFree(vals);CHKERRQ(ierr); 3744 } else { 3745 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3746 } 3747 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3748 3749 /* Partition */ 3750 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3751 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3752 if (use_vwgt) { 3753 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3754 v_wgt[0] = local_size; 3755 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3756 } 3757 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3758 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3759 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3760 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3761 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3762 3763 /* renumber new_ranks to avoid "holes" in new set of processors */ 3764 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3765 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3766 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3767 if (!redprocs) { 3768 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3769 } else { 3770 PetscInt idxs[1]; 3771 PetscMPIInt tag; 3772 MPI_Request *reqs; 3773 3774 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3775 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3776 for (i=rstart;i<rend;i++) { 3777 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3778 } 3779 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3780 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3781 ierr = PetscFree(reqs);CHKERRQ(ierr); 3782 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3783 } 3784 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3785 /* clean up */ 3786 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3787 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3788 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3789 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3790 } 3791 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3792 3793 /* assemble parallel IS for sends */ 3794 i = 1; 3795 if (color) i=0; 3796 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3797 /* get back IS */ 3798 *is_sends = ranks_send_to; 3799 PetscFunctionReturn(0); 3800 } 3801 3802 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatISSubassemble" 3806 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3807 { 3808 Mat local_mat; 3809 IS is_sends_internal; 3810 PetscInt rows,cols,new_local_rows; 3811 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3812 PetscBool ismatis,isdense,newisdense,destroy_mat; 3813 ISLocalToGlobalMapping l2gmap; 3814 PetscInt* l2gmap_indices; 3815 const PetscInt* is_indices; 3816 MatType new_local_type; 3817 /* buffers */ 3818 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3819 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3820 PetscInt *recv_buffer_idxs_local; 3821 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3822 /* MPI */ 3823 MPI_Comm comm,comm_n; 3824 PetscSubcomm subcomm; 3825 PetscMPIInt n_sends,n_recvs,commsize; 3826 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3827 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3828 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3829 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3830 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3831 PetscErrorCode ierr; 3832 3833 PetscFunctionBegin; 3834 /* TODO: add missing checks */ 3835 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3836 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3837 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3838 PetscValidLogicalCollectiveInt(mat,nis,7); 3839 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3840 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3841 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3842 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3843 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3844 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3845 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3846 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3847 PetscInt mrows,mcols,mnrows,mncols; 3848 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3849 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3850 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3851 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3852 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3853 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3854 } 3855 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3856 PetscValidLogicalCollectiveInt(mat,bs,0); 3857 /* prepare IS for sending if not provided */ 3858 if (!is_sends) { 3859 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3860 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3861 } else { 3862 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3863 is_sends_internal = is_sends; 3864 } 3865 3866 /* get comm */ 3867 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3868 3869 /* compute number of sends */ 3870 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3871 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3872 3873 /* compute number of receives */ 3874 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3875 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3876 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3877 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3878 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3879 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3880 ierr = PetscFree(iflags);CHKERRQ(ierr); 3881 3882 /* restrict comm if requested */ 3883 subcomm = 0; 3884 destroy_mat = PETSC_FALSE; 3885 if (restrict_comm) { 3886 PetscMPIInt color,subcommsize; 3887 3888 color = 0; 3889 if (restrict_full) { 3890 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3891 } else { 3892 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3893 } 3894 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3895 subcommsize = commsize - subcommsize; 3896 /* check if reuse has been requested */ 3897 if (reuse == MAT_REUSE_MATRIX) { 3898 if (*mat_n) { 3899 PetscMPIInt subcommsize2; 3900 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3901 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3902 comm_n = PetscObjectComm((PetscObject)*mat_n); 3903 } else { 3904 comm_n = PETSC_COMM_SELF; 3905 } 3906 } else { /* MAT_INITIAL_MATRIX */ 3907 PetscMPIInt rank; 3908 3909 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3910 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3911 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3912 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3913 comm_n = PetscSubcommChild(subcomm); 3914 } 3915 /* flag to destroy *mat_n if not significative */ 3916 if (color) destroy_mat = PETSC_TRUE; 3917 } else { 3918 comm_n = comm; 3919 } 3920 3921 /* prepare send/receive buffers */ 3922 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3923 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3924 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3925 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3926 if (nis) { 3927 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3928 } 3929 3930 /* Get data from local matrices */ 3931 if (!isdense) { 3932 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3933 /* TODO: See below some guidelines on how to prepare the local buffers */ 3934 /* 3935 send_buffer_vals should contain the raw values of the local matrix 3936 send_buffer_idxs should contain: 3937 - MatType_PRIVATE type 3938 - PetscInt size_of_l2gmap 3939 - PetscInt global_row_indices[size_of_l2gmap] 3940 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3941 */ 3942 } else { 3943 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3944 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3945 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3946 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3947 send_buffer_idxs[1] = i; 3948 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3949 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3950 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3951 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3952 for (i=0;i<n_sends;i++) { 3953 ilengths_vals[is_indices[i]] = len*len; 3954 ilengths_idxs[is_indices[i]] = len+2; 3955 } 3956 } 3957 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3958 /* additional is (if any) */ 3959 if (nis) { 3960 PetscMPIInt psum; 3961 PetscInt j; 3962 for (j=0,psum=0;j<nis;j++) { 3963 PetscInt plen; 3964 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3965 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3966 psum += len+1; /* indices + lenght */ 3967 } 3968 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3969 for (j=0,psum=0;j<nis;j++) { 3970 PetscInt plen; 3971 const PetscInt *is_array_idxs; 3972 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3973 send_buffer_idxs_is[psum] = plen; 3974 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3975 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3976 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3977 psum += plen+1; /* indices + lenght */ 3978 } 3979 for (i=0;i<n_sends;i++) { 3980 ilengths_idxs_is[is_indices[i]] = psum; 3981 } 3982 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3983 } 3984 3985 buf_size_idxs = 0; 3986 buf_size_vals = 0; 3987 buf_size_idxs_is = 0; 3988 for (i=0;i<n_recvs;i++) { 3989 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3990 buf_size_vals += (PetscInt)olengths_vals[i]; 3991 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3992 } 3993 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3994 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3995 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3996 3997 /* get new tags for clean communications */ 3998 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3999 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4000 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4001 4002 /* allocate for requests */ 4003 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4004 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4005 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4006 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4007 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4008 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4009 4010 /* communications */ 4011 ptr_idxs = recv_buffer_idxs; 4012 ptr_vals = recv_buffer_vals; 4013 ptr_idxs_is = recv_buffer_idxs_is; 4014 for (i=0;i<n_recvs;i++) { 4015 source_dest = onodes[i]; 4016 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4017 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4018 ptr_idxs += olengths_idxs[i]; 4019 ptr_vals += olengths_vals[i]; 4020 if (nis) { 4021 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4022 ptr_idxs_is += olengths_idxs_is[i]; 4023 } 4024 } 4025 for (i=0;i<n_sends;i++) { 4026 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4027 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4028 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4029 if (nis) { 4030 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4031 } 4032 } 4033 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4034 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4035 4036 /* assemble new l2g map */ 4037 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4038 ptr_idxs = recv_buffer_idxs; 4039 new_local_rows = 0; 4040 for (i=0;i<n_recvs;i++) { 4041 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4042 ptr_idxs += olengths_idxs[i]; 4043 } 4044 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4045 ptr_idxs = recv_buffer_idxs; 4046 new_local_rows = 0; 4047 for (i=0;i<n_recvs;i++) { 4048 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4049 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4050 ptr_idxs += olengths_idxs[i]; 4051 } 4052 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4053 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4054 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4055 4056 /* infer new local matrix type from received local matrices type */ 4057 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4058 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4059 if (n_recvs) { 4060 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4061 ptr_idxs = recv_buffer_idxs; 4062 for (i=0;i<n_recvs;i++) { 4063 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4064 new_local_type_private = MATAIJ_PRIVATE; 4065 break; 4066 } 4067 ptr_idxs += olengths_idxs[i]; 4068 } 4069 switch (new_local_type_private) { 4070 case MATDENSE_PRIVATE: 4071 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4072 new_local_type = MATSEQAIJ; 4073 bs = 1; 4074 } else { /* if I receive only 1 dense matrix */ 4075 new_local_type = MATSEQDENSE; 4076 bs = 1; 4077 } 4078 break; 4079 case MATAIJ_PRIVATE: 4080 new_local_type = MATSEQAIJ; 4081 bs = 1; 4082 break; 4083 case MATBAIJ_PRIVATE: 4084 new_local_type = MATSEQBAIJ; 4085 break; 4086 case MATSBAIJ_PRIVATE: 4087 new_local_type = MATSEQSBAIJ; 4088 break; 4089 default: 4090 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4091 break; 4092 } 4093 } else { /* by default, new_local_type is seqdense */ 4094 new_local_type = MATSEQDENSE; 4095 bs = 1; 4096 } 4097 4098 /* create MATIS object if needed */ 4099 if (reuse == MAT_INITIAL_MATRIX) { 4100 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4101 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4102 } else { 4103 /* it also destroys the local matrices */ 4104 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4105 } 4106 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4107 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4108 4109 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4110 4111 /* Global to local map of received indices */ 4112 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4113 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4114 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4115 4116 /* restore attributes -> type of incoming data and its size */ 4117 buf_size_idxs = 0; 4118 for (i=0;i<n_recvs;i++) { 4119 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4120 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4121 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4122 } 4123 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4124 4125 /* set preallocation */ 4126 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4127 if (!newisdense) { 4128 PetscInt *new_local_nnz=0; 4129 4130 ptr_vals = recv_buffer_vals; 4131 ptr_idxs = recv_buffer_idxs_local; 4132 if (n_recvs) { 4133 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4134 } 4135 for (i=0;i<n_recvs;i++) { 4136 PetscInt j; 4137 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4138 for (j=0;j<*(ptr_idxs+1);j++) { 4139 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4140 } 4141 } else { 4142 /* TODO */ 4143 } 4144 ptr_idxs += olengths_idxs[i]; 4145 } 4146 if (new_local_nnz) { 4147 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4148 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4149 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4150 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4151 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4152 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4153 } else { 4154 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4155 } 4156 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4157 } else { 4158 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4159 } 4160 4161 /* set values */ 4162 ptr_vals = recv_buffer_vals; 4163 ptr_idxs = recv_buffer_idxs_local; 4164 for (i=0;i<n_recvs;i++) { 4165 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4166 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4167 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4168 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4169 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4170 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4171 } else { 4172 /* TODO */ 4173 } 4174 ptr_idxs += olengths_idxs[i]; 4175 ptr_vals += olengths_vals[i]; 4176 } 4177 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4178 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4179 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4180 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4181 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4182 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4183 4184 #if 0 4185 if (!restrict_comm) { /* check */ 4186 Vec lvec,rvec; 4187 PetscReal infty_error; 4188 4189 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4190 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4191 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4192 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4193 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4194 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4195 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4196 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4197 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4198 } 4199 #endif 4200 4201 /* assemble new additional is (if any) */ 4202 if (nis) { 4203 PetscInt **temp_idxs,*count_is,j,psum; 4204 4205 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4206 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4207 ptr_idxs = recv_buffer_idxs_is; 4208 psum = 0; 4209 for (i=0;i<n_recvs;i++) { 4210 for (j=0;j<nis;j++) { 4211 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4212 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4213 psum += plen; 4214 ptr_idxs += plen+1; /* shift pointer to received data */ 4215 } 4216 } 4217 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4218 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4219 for (i=1;i<nis;i++) { 4220 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4221 } 4222 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4223 ptr_idxs = recv_buffer_idxs_is; 4224 for (i=0;i<n_recvs;i++) { 4225 for (j=0;j<nis;j++) { 4226 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4227 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4228 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4229 ptr_idxs += plen+1; /* shift pointer to received data */ 4230 } 4231 } 4232 for (i=0;i<nis;i++) { 4233 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4234 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4235 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4236 } 4237 ierr = PetscFree(count_is);CHKERRQ(ierr); 4238 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4239 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4240 } 4241 /* free workspace */ 4242 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4243 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4244 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4245 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4246 if (isdense) { 4247 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4248 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4249 } else { 4250 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4251 } 4252 if (nis) { 4253 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4254 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4255 } 4256 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4257 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4258 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4259 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4260 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4261 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4262 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4263 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4264 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4265 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4266 ierr = PetscFree(onodes);CHKERRQ(ierr); 4267 if (nis) { 4268 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4269 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4270 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4271 } 4272 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4273 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4274 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4275 for (i=0;i<nis;i++) { 4276 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4277 } 4278 *mat_n = NULL; 4279 } 4280 PetscFunctionReturn(0); 4281 } 4282 4283 /* temporary hack into ksp private data structure */ 4284 #include <petsc/private/kspimpl.h> 4285 4286 #undef __FUNCT__ 4287 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4288 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4289 { 4290 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4291 PC_IS *pcis = (PC_IS*)pc->data; 4292 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4293 MatNullSpace CoarseNullSpace=NULL; 4294 ISLocalToGlobalMapping coarse_islg; 4295 IS coarse_is,*isarray; 4296 PetscInt i,im_active=-1,active_procs=-1; 4297 PetscInt nis,nisdofs,nisneu; 4298 PC pc_temp; 4299 PCType coarse_pc_type; 4300 KSPType coarse_ksp_type; 4301 PetscBool multilevel_requested,multilevel_allowed; 4302 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4303 Mat t_coarse_mat_is; 4304 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4305 PetscMPIInt all_procs; 4306 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4307 PetscBool compute_vecs = PETSC_FALSE; 4308 PetscScalar *array; 4309 PetscErrorCode ierr; 4310 4311 PetscFunctionBegin; 4312 /* Assign global numbering to coarse dofs */ 4313 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4314 PetscInt ocoarse_size; 4315 compute_vecs = PETSC_TRUE; 4316 ocoarse_size = pcbddc->coarse_size; 4317 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4318 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4319 /* see if we can avoid some work */ 4320 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4321 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4322 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4323 PC pc; 4324 PetscBool isbddc; 4325 4326 /* temporary workaround since PCBDDC does not have a reset method so far */ 4327 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4328 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4329 if (isbddc) { 4330 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4331 } 4332 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4333 coarse_reuse = PETSC_FALSE; 4334 } else { /* we can safely reuse already computed coarse matrix */ 4335 coarse_reuse = PETSC_TRUE; 4336 } 4337 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4338 coarse_reuse = PETSC_FALSE; 4339 } 4340 /* reset any subassembling information */ 4341 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4342 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4343 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4344 coarse_reuse = PETSC_TRUE; 4345 } 4346 4347 /* count "active" (i.e. with positive local size) and "void" processes */ 4348 im_active = !!(pcis->n); 4349 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4350 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4351 void_procs = all_procs-active_procs; 4352 csin_type_simple = PETSC_TRUE; 4353 redist = PETSC_FALSE; 4354 if (pcbddc->current_level && void_procs) { 4355 csin_ml = PETSC_TRUE; 4356 ncoarse_ml = void_procs; 4357 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4358 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4359 csin_ds = PETSC_TRUE; 4360 ncoarse_ds = pcbddc->redistribute_coarse; 4361 redist = PETSC_TRUE; 4362 } else { 4363 csin_ds = PETSC_TRUE; 4364 ncoarse_ds = active_procs; 4365 redist = PETSC_TRUE; 4366 } 4367 } else { 4368 csin_ml = PETSC_FALSE; 4369 ncoarse_ml = all_procs; 4370 if (void_procs) { 4371 csin_ds = PETSC_TRUE; 4372 ncoarse_ds = void_procs; 4373 csin_type_simple = PETSC_FALSE; 4374 } else { 4375 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4376 csin_ds = PETSC_TRUE; 4377 ncoarse_ds = pcbddc->redistribute_coarse; 4378 redist = PETSC_TRUE; 4379 } else { 4380 csin_ds = PETSC_FALSE; 4381 ncoarse_ds = all_procs; 4382 } 4383 } 4384 } 4385 4386 /* 4387 test if we can go multilevel: three conditions must be satisfied: 4388 - we have not exceeded the number of levels requested 4389 - we can actually subassemble the active processes 4390 - we can find a suitable number of MPI processes where we can place the subassembled problem 4391 */ 4392 multilevel_allowed = PETSC_FALSE; 4393 multilevel_requested = PETSC_FALSE; 4394 if (pcbddc->current_level < pcbddc->max_levels) { 4395 multilevel_requested = PETSC_TRUE; 4396 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4397 multilevel_allowed = PETSC_FALSE; 4398 } else { 4399 multilevel_allowed = PETSC_TRUE; 4400 } 4401 } 4402 /* determine number of process partecipating to coarse solver */ 4403 if (multilevel_allowed) { 4404 ncoarse = ncoarse_ml; 4405 csin = csin_ml; 4406 redist = PETSC_FALSE; 4407 } else { 4408 ncoarse = ncoarse_ds; 4409 csin = csin_ds; 4410 } 4411 4412 /* creates temporary l2gmap and IS for coarse indexes */ 4413 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4414 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4415 4416 /* creates temporary MATIS object for coarse matrix */ 4417 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4418 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4419 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4420 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4421 #if 0 4422 { 4423 PetscViewer viewer; 4424 char filename[256]; 4425 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4426 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4427 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4428 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4429 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4430 } 4431 #endif 4432 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4433 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4434 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4435 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4436 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4437 4438 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4439 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4440 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4441 const PetscInt *idxs; 4442 ISLocalToGlobalMapping tmap; 4443 4444 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4445 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4446 /* allocate space for temporary storage */ 4447 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4448 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4449 /* allocate for IS array */ 4450 nisdofs = pcbddc->n_ISForDofsLocal; 4451 nisneu = !!pcbddc->NeumannBoundariesLocal; 4452 nis = nisdofs + nisneu; 4453 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4454 /* dofs splitting */ 4455 for (i=0;i<nisdofs;i++) { 4456 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4457 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4458 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4459 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4460 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4461 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4462 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4463 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4464 } 4465 /* neumann boundaries */ 4466 if (pcbddc->NeumannBoundariesLocal) { 4467 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4468 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4469 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4470 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4471 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4472 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4473 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4474 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4475 } 4476 /* free memory */ 4477 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4478 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4479 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4480 } else { 4481 nis = 0; 4482 nisdofs = 0; 4483 nisneu = 0; 4484 isarray = NULL; 4485 } 4486 /* destroy no longer needed map */ 4487 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4488 4489 /* restrict on coarse candidates (if needed) */ 4490 coarse_mat_is = NULL; 4491 if (csin) { 4492 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4493 if (redist) { 4494 PetscMPIInt rank; 4495 PetscInt spc,n_spc_p1,dest[1],destsize; 4496 4497 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4498 spc = active_procs/ncoarse; 4499 n_spc_p1 = active_procs%ncoarse; 4500 if (im_active) { 4501 destsize = 1; 4502 if (rank > n_spc_p1*(spc+1)-1) { 4503 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4504 } else { 4505 dest[0] = rank/(spc+1); 4506 } 4507 } else { 4508 destsize = 0; 4509 } 4510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4511 } else if (csin_type_simple) { 4512 PetscMPIInt rank; 4513 PetscInt issize,isidx; 4514 4515 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4516 if (im_active) { 4517 issize = 1; 4518 isidx = (PetscInt)rank; 4519 } else { 4520 issize = 0; 4521 isidx = -1; 4522 } 4523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4524 } else { /* get a suitable subassembling pattern from MATIS code */ 4525 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4526 } 4527 4528 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4529 if (!redist || ncoarse <= void_procs) { 4530 PetscInt ncoarse_cand,tissize,*nisindices; 4531 PetscInt *coarse_candidates; 4532 const PetscInt* tisindices; 4533 4534 /* get coarse candidates' ranks in pc communicator */ 4535 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4536 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4537 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4538 if (!coarse_candidates[i]) { 4539 coarse_candidates[ncoarse_cand++]=i; 4540 } 4541 } 4542 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4543 4544 4545 if (pcbddc->dbg_flag) { 4546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4547 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4548 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4550 for (i=0;i<ncoarse_cand;i++) { 4551 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4552 } 4553 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4555 } 4556 /* shift the pattern on coarse candidates */ 4557 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4558 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4559 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4560 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4561 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4562 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4563 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4564 } 4565 if (pcbddc->dbg_flag) { 4566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4568 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4570 } 4571 } 4572 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4573 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4574 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4575 } else { /* this is the last level, so use just receiving processes in subcomm */ 4576 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4577 } 4578 } else { 4579 if (pcbddc->dbg_flag) { 4580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4583 } 4584 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4585 coarse_mat_is = t_coarse_mat_is; 4586 } 4587 4588 /* create local to global scatters for coarse problem */ 4589 if (compute_vecs) { 4590 PetscInt lrows; 4591 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4592 if (coarse_mat_is) { 4593 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4594 } else { 4595 lrows = 0; 4596 } 4597 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4598 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4599 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4600 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4601 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4602 } 4603 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4604 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4605 4606 /* set defaults for coarse KSP and PC */ 4607 if (multilevel_allowed) { 4608 coarse_ksp_type = KSPRICHARDSON; 4609 coarse_pc_type = PCBDDC; 4610 } else { 4611 coarse_ksp_type = KSPPREONLY; 4612 coarse_pc_type = PCREDUNDANT; 4613 } 4614 4615 /* print some info if requested */ 4616 if (pcbddc->dbg_flag) { 4617 if (!multilevel_allowed) { 4618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4619 if (multilevel_requested) { 4620 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4621 } else if (pcbddc->max_levels) { 4622 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4623 } 4624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4625 } 4626 } 4627 4628 /* create the coarse KSP object only once with defaults */ 4629 if (coarse_mat_is) { 4630 MatReuse coarse_mat_reuse; 4631 PetscViewer dbg_viewer = NULL; 4632 if (pcbddc->dbg_flag) { 4633 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4634 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4635 } 4636 if (!pcbddc->coarse_ksp) { 4637 char prefix[256],str_level[16]; 4638 size_t len; 4639 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4640 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4641 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4642 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4643 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4644 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4645 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4646 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4647 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4648 /* prefix */ 4649 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4650 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4651 if (!pcbddc->current_level) { 4652 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4653 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4654 } else { 4655 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4656 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4657 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4658 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4659 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4660 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4661 } 4662 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4663 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4664 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4665 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4666 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4667 /* allow user customization */ 4668 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4669 } 4670 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4671 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4672 if (nisdofs) { 4673 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4674 for (i=0;i<nisdofs;i++) { 4675 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4676 } 4677 } 4678 if (nisneu) { 4679 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4680 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4681 } 4682 4683 /* get some info after set from options */ 4684 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4685 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4686 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4687 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4688 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4689 isbddc = PETSC_FALSE; 4690 } 4691 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4692 if (isredundant) { 4693 KSP inner_ksp; 4694 PC inner_pc; 4695 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4696 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4697 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4698 } 4699 4700 /* assemble coarse matrix */ 4701 if (coarse_reuse) { 4702 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4703 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4704 coarse_mat_reuse = MAT_REUSE_MATRIX; 4705 } else { 4706 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4707 } 4708 if (isbddc || isnn) { 4709 if (pcbddc->coarsening_ratio > 1) { 4710 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4711 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4712 if (pcbddc->dbg_flag) { 4713 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4714 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4715 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4716 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4717 } 4718 } 4719 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4720 } else { 4721 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4722 coarse_mat = coarse_mat_is; 4723 } 4724 } else { 4725 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4726 } 4727 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4728 4729 /* propagate symmetry info of coarse matrix */ 4730 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4731 if (pc->pmat->symmetric_set) { 4732 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4733 } 4734 if (pc->pmat->hermitian_set) { 4735 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4736 } 4737 if (pc->pmat->spd_set) { 4738 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4739 } 4740 /* set operators */ 4741 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4742 if (pcbddc->dbg_flag) { 4743 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4744 } 4745 } else { /* processes non partecipating to coarse solver (if any) */ 4746 coarse_mat = 0; 4747 } 4748 ierr = PetscFree(isarray);CHKERRQ(ierr); 4749 #if 0 4750 { 4751 PetscViewer viewer; 4752 char filename[256]; 4753 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4754 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4755 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4756 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4757 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4758 } 4759 #endif 4760 4761 /* Compute coarse null space (special handling by BDDC only) */ 4762 #if 0 4763 if (pcbddc->NullSpace) { 4764 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4765 } 4766 #endif 4767 4768 if (pcbddc->coarse_ksp) { 4769 Vec crhs,csol; 4770 PetscBool ispreonly; 4771 4772 if (CoarseNullSpace) { 4773 if (isbddc) { 4774 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4775 } else { 4776 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4777 } 4778 } 4779 /* setup coarse ksp */ 4780 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4781 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4782 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4783 /* hack */ 4784 if (!csol) { 4785 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4786 } 4787 if (!crhs) { 4788 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4789 } 4790 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4791 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4792 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4793 KSP check_ksp; 4794 KSPType check_ksp_type; 4795 PC check_pc; 4796 Vec check_vec,coarse_vec; 4797 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4798 PetscInt its; 4799 PetscBool compute_eigs; 4800 PetscReal *eigs_r,*eigs_c; 4801 PetscInt neigs; 4802 const char *prefix; 4803 4804 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4805 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4806 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4807 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4808 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4809 if (ispreonly) { 4810 check_ksp_type = KSPPREONLY; 4811 compute_eigs = PETSC_FALSE; 4812 } else { 4813 check_ksp_type = KSPGMRES; 4814 compute_eigs = PETSC_TRUE; 4815 } 4816 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4817 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4818 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4819 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4820 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4821 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4822 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4823 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4824 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4825 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4826 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4827 /* create random vec */ 4828 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4829 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4830 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4831 if (CoarseNullSpace) { 4832 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4833 } 4834 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4835 /* solve coarse problem */ 4836 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4837 if (CoarseNullSpace) { 4838 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4839 } 4840 /* set eigenvalue estimation if preonly has not been requested */ 4841 if (compute_eigs) { 4842 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4843 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4844 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4845 lambda_max = eigs_r[neigs-1]; 4846 lambda_min = eigs_r[0]; 4847 if (pcbddc->use_coarse_estimates) { 4848 if (lambda_max>lambda_min) { 4849 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4850 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4851 } 4852 } 4853 } 4854 4855 /* check coarse problem residual error */ 4856 if (pcbddc->dbg_flag) { 4857 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4858 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4859 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4860 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4861 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4862 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4863 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4864 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4865 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4866 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4867 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4868 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4869 if (compute_eigs) { 4870 PetscReal lambda_max_s,lambda_min_s; 4871 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4872 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4873 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4874 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4875 for (i=0;i<neigs;i++) { 4876 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4877 } 4878 } 4879 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4880 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4881 } 4882 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4883 if (compute_eigs) { 4884 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4885 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4886 } 4887 } 4888 } 4889 /* print additional info */ 4890 if (pcbddc->dbg_flag) { 4891 /* waits until all processes reaches this point */ 4892 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4893 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4895 } 4896 4897 /* free memory */ 4898 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4899 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4905 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4906 { 4907 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4908 PC_IS* pcis = (PC_IS*)pc->data; 4909 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4910 IS subset,subset_mult,subset_n; 4911 PetscInt local_size,coarse_size=0; 4912 PetscInt *local_primal_indices=NULL; 4913 const PetscInt *t_local_primal_indices; 4914 PetscErrorCode ierr; 4915 4916 PetscFunctionBegin; 4917 /* Compute global number of coarse dofs */ 4918 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4919 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4920 } 4921 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4922 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4923 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4924 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4925 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4926 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4927 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4928 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4929 if (local_size != pcbddc->local_primal_size) { 4930 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4931 } 4932 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4933 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4934 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4935 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4936 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4937 4938 /* check numbering */ 4939 if (pcbddc->dbg_flag) { 4940 PetscScalar coarsesum,*array,*array2; 4941 PetscInt i; 4942 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4943 4944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4946 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4947 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4948 /* counter */ 4949 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4950 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4951 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4952 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4953 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4954 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4955 4956 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4957 for (i=0;i<pcbddc->local_primal_size;i++) { 4958 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4959 } 4960 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4961 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4962 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4963 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4964 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4965 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4966 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4967 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4968 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4969 for (i=0;i<pcis->n;i++) { 4970 if (array[i] != 0.0 && array[i] != array2[i]) { 4971 PetscInt owned = (PetscInt)(array[i]); 4972 PetscInt neigh = (PetscInt)(array2[i]); 4973 set_error = PETSC_TRUE; 4974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4975 } 4976 } 4977 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4978 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4979 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4980 for (i=0;i<pcis->n;i++) { 4981 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4982 } 4983 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4984 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4985 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4986 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4987 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4989 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4990 PetscInt *gidxs; 4991 4992 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4993 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4994 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4997 for (i=0;i<pcbddc->local_primal_size;i++) { 4998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4999 } 5000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5001 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5002 } 5003 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5004 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5005 } 5006 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5007 /* get back data */ 5008 *coarse_size_n = coarse_size; 5009 *local_primal_indices_n = local_primal_indices; 5010 PetscFunctionReturn(0); 5011 } 5012 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "PCBDDCGlobalToLocal" 5015 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5016 { 5017 IS localis_t; 5018 PetscInt i,lsize,*idxs,n; 5019 PetscScalar *vals; 5020 PetscErrorCode ierr; 5021 5022 PetscFunctionBegin; 5023 /* get indices in local ordering exploiting local to global map */ 5024 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5025 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5026 for (i=0;i<lsize;i++) vals[i] = 1.0; 5027 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5028 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5029 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5030 if (idxs) { /* multilevel guard */ 5031 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5032 } 5033 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5034 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5035 ierr = PetscFree(vals);CHKERRQ(ierr); 5036 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5037 /* now compute set in local ordering */ 5038 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5039 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5040 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5041 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5042 for (i=0,lsize=0;i<n;i++) { 5043 if (PetscRealPart(vals[i]) > 0.5) { 5044 lsize++; 5045 } 5046 } 5047 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5048 for (i=0,lsize=0;i<n;i++) { 5049 if (PetscRealPart(vals[i]) > 0.5) { 5050 idxs[lsize++] = i; 5051 } 5052 } 5053 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5054 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5055 *localis = localis_t; 5056 PetscFunctionReturn(0); 5057 } 5058 5059 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5060 #undef __FUNCT__ 5061 #define __FUNCT__ "PCBDDCMatMult_Private" 5062 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5063 { 5064 PCBDDCChange_ctx change_ctx; 5065 PetscErrorCode ierr; 5066 5067 PetscFunctionBegin; 5068 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5069 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5070 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5071 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5072 PetscFunctionReturn(0); 5073 } 5074 5075 #undef __FUNCT__ 5076 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5077 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5078 { 5079 PCBDDCChange_ctx change_ctx; 5080 PetscErrorCode ierr; 5081 5082 PetscFunctionBegin; 5083 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5084 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5085 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5086 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5087 PetscFunctionReturn(0); 5088 } 5089 5090 #undef __FUNCT__ 5091 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5092 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5093 { 5094 PC_IS *pcis=(PC_IS*)pc->data; 5095 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5096 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5097 Mat S_j; 5098 PetscInt *used_xadj,*used_adjncy; 5099 PetscBool free_used_adj; 5100 PetscErrorCode ierr; 5101 5102 PetscFunctionBegin; 5103 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5104 free_used_adj = PETSC_FALSE; 5105 if (pcbddc->sub_schurs_layers == -1) { 5106 used_xadj = NULL; 5107 used_adjncy = NULL; 5108 } else { 5109 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5110 used_xadj = pcbddc->mat_graph->xadj; 5111 used_adjncy = pcbddc->mat_graph->adjncy; 5112 } else if (pcbddc->computed_rowadj) { 5113 used_xadj = pcbddc->mat_graph->xadj; 5114 used_adjncy = pcbddc->mat_graph->adjncy; 5115 } else { 5116 PetscBool flg_row=PETSC_FALSE; 5117 const PetscInt *xadj,*adjncy; 5118 PetscInt nvtxs; 5119 5120 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5121 if (flg_row) { 5122 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5123 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5124 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5125 free_used_adj = PETSC_TRUE; 5126 } else { 5127 pcbddc->sub_schurs_layers = -1; 5128 used_xadj = NULL; 5129 used_adjncy = NULL; 5130 } 5131 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5132 } 5133 } 5134 5135 /* setup sub_schurs data */ 5136 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5137 if (!sub_schurs->use_mumps) { 5138 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5139 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5140 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5141 } else { 5142 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5143 PetscBool isseqaij; 5144 if (!pcbddc->use_vertices && reuse_solvers) { 5145 PetscInt n_vertices; 5146 5147 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5148 reuse_solvers = (PetscBool)!n_vertices; 5149 } 5150 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5151 if (!isseqaij) { 5152 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5153 if (matis->A == pcbddc->local_mat) { 5154 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5155 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5156 } else { 5157 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5158 } 5159 } 5160 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5161 } 5162 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5163 5164 /* free adjacency */ 5165 if (free_used_adj) { 5166 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5167 } 5168 PetscFunctionReturn(0); 5169 } 5170 5171 #undef __FUNCT__ 5172 #define __FUNCT__ "PCBDDCInitSubSchurs" 5173 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5174 { 5175 PC_IS *pcis=(PC_IS*)pc->data; 5176 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5177 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5178 PCBDDCGraph graph; 5179 PetscErrorCode ierr; 5180 5181 PetscFunctionBegin; 5182 /* attach interface graph for determining subsets */ 5183 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5184 IS verticesIS,verticescomm; 5185 PetscInt vsize,*idxs; 5186 5187 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5188 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5189 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5190 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5191 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5192 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5193 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5194 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5195 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5196 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5197 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5198 /* 5199 if (pcbddc->dbg_flag) { 5200 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5201 } 5202 */ 5203 } else { 5204 graph = pcbddc->mat_graph; 5205 } 5206 5207 /* sub_schurs init */ 5208 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5209 5210 /* free graph struct */ 5211 if (pcbddc->sub_schurs_rebuild) { 5212 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5213 } 5214 PetscFunctionReturn(0); 5215 } 5216