1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 */ 53 if (pcbddc->benign_p0_lidx < 0) { 54 PetscFunctionReturn(0); 55 } 56 if (pop) { 57 const PetscInt *cB0_cols; 58 PetscInt cB0_ncol; 59 const PetscScalar *cB0_vals; 60 61 /* extract B_0 */ 62 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 pcbddc->B0_ncol = cB0_ncol; 64 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 65 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 68 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 69 /* remove rows and cols from local problem */ 70 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 72 } else { /* push */ 73 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 74 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 77 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 } 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCAdaptiveSelection" 84 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 85 { 86 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 87 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 88 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 89 PetscBLASInt *B_iwork,*B_ifail; 90 PetscScalar *work,lwork; 91 PetscScalar *St,*S,*eigv; 92 PetscScalar *Sarray,*Starray; 93 PetscReal *eigs,thresh; 94 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 95 PetscBool allocated_S_St; 96 #if defined(PETSC_USE_COMPLEX) 97 PetscReal *rwork; 98 #endif 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 if (!sub_schurs->use_mumps) { 103 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 104 } 105 106 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 107 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 108 } 109 110 if (pcbddc->dbg_flag) { 111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 115 } 116 117 if (pcbddc->dbg_flag) { 118 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 119 } 120 121 /* max size of subsets */ 122 mss = 0; 123 for (i=0;i<sub_schurs->n_subs;i++) { 124 PetscInt subset_size; 125 126 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 127 mss = PetscMax(mss,subset_size); 128 } 129 130 /* min/max and threshold */ 131 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 132 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 133 nmax = PetscMax(nmin,nmax); 134 allocated_S_St = PETSC_FALSE; 135 if (nmin) { 136 allocated_S_St = PETSC_TRUE; 137 } 138 139 /* allocate lapack workspace */ 140 cum = cum2 = 0; 141 maxneigs = 0; 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt n,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 n = PetscMin(subset_size,nmax); 147 cum += subset_size; 148 cum2 += subset_size*n; 149 maxneigs = PetscMax(maxneigs,n); 150 } 151 if (mss) { 152 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 153 PetscBLASInt B_itype = 1; 154 PetscBLASInt B_N = mss; 155 PetscReal zero = 0.0; 156 PetscReal eps = 0.0; /* dlamch? */ 157 158 B_lwork = -1; 159 S = NULL; 160 St = NULL; 161 eigs = NULL; 162 eigv = NULL; 163 B_iwork = NULL; 164 B_ifail = NULL; 165 #if defined(PETSC_USE_COMPLEX) 166 rwork = NULL; 167 #endif 168 thresh = 1.0; 169 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 170 #if defined(PETSC_USE_COMPLEX) 171 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 172 #else 173 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 174 #endif 175 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 176 ierr = PetscFPTrapPop();CHKERRQ(ierr); 177 } else { 178 /* TODO */ 179 } 180 } else { 181 lwork = 0; 182 } 183 184 nv = 0; 185 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 186 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 187 } 188 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 189 if (allocated_S_St) { 190 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 191 } 192 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 193 #if defined(PETSC_USE_COMPLEX) 194 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 195 #endif 196 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 197 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 199 nv+cum,&pcbddc->adaptive_constraints_idxs, 200 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 201 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 202 203 maxneigs = 0; 204 cum = cum2 = cumarray = 0; 205 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 206 pcbddc->adaptive_constraints_data_ptr[0] = 0; 207 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 208 const PetscInt *idxs; 209 210 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 211 for (cum=0;cum<nv;cum++) { 212 pcbddc->adaptive_constraints_n[cum] = 1; 213 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 214 pcbddc->adaptive_constraints_data[cum] = 1.0; 215 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 216 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 217 } 218 cum2 = cum; 219 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 220 } 221 222 if (mss) { /* multilevel */ 223 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 225 } 226 227 for (i=0;i<sub_schurs->n_subs;i++) { 228 229 const PetscInt *idxs; 230 PetscReal infty = PETSC_MAX_REAL; 231 PetscInt j,subset_size,eigs_start = 0; 232 PetscBLASInt B_N; 233 PetscBool same_data = PETSC_FALSE; 234 235 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 236 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 237 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 238 if (sub_schurs->is_hermitian) { 239 PetscInt j,k; 240 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 241 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 for (j=0;j<subset_size;j++) { 245 for (k=j;k<subset_size;k++) { 246 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 247 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 248 } 249 } 250 } else { 251 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 } else { 255 S = Sarray + cumarray; 256 St = Starray + cumarray; 257 } 258 259 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 260 /* see if we can save some work */ 261 if (sub_schurs->n_subs == 1) { 262 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 263 } 264 265 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 266 B_neigs = 0; 267 } else { 268 /* Threshold: this is an heuristic for edges */ 269 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 270 271 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 272 PetscBLASInt B_itype = 1; 273 PetscBLASInt B_IL, B_IU; 274 PetscReal eps = -1.0; /* dlamch? */ 275 PetscInt nmin_s; 276 277 /* ask for eigenvalues larger than thresh */ 278 if (pcbddc->dbg_flag) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 280 } 281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 282 #if defined(PETSC_USE_COMPLEX) 283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 284 #else 285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 286 #endif 287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 288 if (B_ierr) { 289 if (B_ierr < 0 ) { 290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 291 } else if (B_ierr <= B_N) { 292 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 293 } else { 294 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 295 } 296 } 297 298 if (B_neigs > nmax) { 299 if (pcbddc->dbg_flag) { 300 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 301 } 302 eigs_start = B_neigs -nmax; 303 B_neigs = nmax; 304 } 305 306 nmin_s = PetscMin(nmin,B_N); 307 if (B_neigs < nmin_s) { 308 PetscBLASInt B_neigs2; 309 310 B_IU = B_N - B_neigs; 311 B_IL = B_N - nmin_s + 1; 312 if (pcbddc->dbg_flag) { 313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 314 } 315 if (sub_schurs->is_hermitian) { 316 PetscInt j; 317 for (j=0;j<subset_size;j++) { 318 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 319 } 320 for (j=0;j<subset_size;j++) { 321 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 322 } 323 } else { 324 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 326 } 327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 330 #else 331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 332 #endif 333 ierr = PetscFPTrapPop();CHKERRQ(ierr); 334 B_neigs += B_neigs2; 335 } 336 if (B_ierr) { 337 if (B_ierr < 0 ) { 338 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 339 } else if (B_ierr <= B_N) { 340 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 341 } else { 342 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 343 } 344 } 345 if (pcbddc->dbg_flag) { 346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 347 for (j=0;j<B_neigs;j++) { 348 if (eigs[j] == 0.0) { 349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 352 } 353 } 354 } 355 } else { 356 /* TODO */ 357 } 358 } 359 maxneigs = PetscMax(B_neigs,maxneigs); 360 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 361 if (B_neigs) { 362 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 363 364 if (pcbddc->dbg_flag > 1) { 365 PetscInt ii; 366 for (ii=0;ii<B_neigs;ii++) { 367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 368 for (j=0;j<B_N;j++) { 369 #if defined(PETSC_USE_COMPLEX) 370 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 371 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 373 #else 374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 375 #endif 376 } 377 } 378 } 379 #if 0 380 for (j=0;j<B_neigs;j++) { 381 PetscBLASInt Blas_N,Blas_one = 1.0; 382 PetscScalar norm; 383 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 384 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 385 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 387 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 388 } else { 389 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 390 } 391 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 392 } 393 #endif 394 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 395 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 396 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 397 cum++; 398 } 399 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 400 /* shift for next computation */ 401 cumarray += subset_size*subset_size; 402 } 403 if (pcbddc->dbg_flag) { 404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 405 } 406 407 if (mss) { 408 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 409 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 410 /* destroy matrices (junk) */ 411 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 412 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 413 } 414 if (allocated_S_St) { 415 ierr = PetscFree2(S,St);CHKERRQ(ierr); 416 } 417 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 418 #if defined(PETSC_USE_COMPLEX) 419 ierr = PetscFree(rwork);CHKERRQ(ierr); 420 #endif 421 if (pcbddc->dbg_flag) { 422 PetscInt maxneigs_r; 423 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCSetUpSolvers" 431 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 432 { 433 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *coarse_submat_vals; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 /* Setup local scatters R_to_B and (optionally) R_to_D */ 439 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 440 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 441 442 /* Setup local neumann solver ksp_R */ 443 /* PCBDDCSetUpLocalScatters should be called first! */ 444 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 445 446 /* Change global null space passed in by the user if change of basis has been requested */ 447 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 448 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 449 } 450 451 /* 452 Setup local correction and local part of coarse basis. 453 Gives back the dense local part of the coarse matrix in column major ordering 454 */ 455 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 456 457 /* Compute total number of coarse nodes and setup coarse solver */ 458 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 459 460 /* free */ 461 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "PCBDDCResetCustomization" 467 PetscErrorCode PCBDDCResetCustomization(PC pc) 468 { 469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 474 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 475 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 476 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 477 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 478 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 479 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 480 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 481 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 482 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 483 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "PCBDDCResetTopography" 489 PetscErrorCode PCBDDCResetTopography(PC pc) 490 { 491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 496 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 497 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 498 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 499 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCResetSolvers" 505 PetscErrorCode PCBDDCResetSolvers(PC pc) 506 { 507 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 508 PetscScalar *array; 509 PetscErrorCode ierr; 510 511 PetscFunctionBegin; 512 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 513 if (pcbddc->coarse_phi_B) { 514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 515 ierr = PetscFree(array);CHKERRQ(ierr); 516 } 517 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 519 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 521 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 523 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 524 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 525 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 526 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 527 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 528 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 529 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 531 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 532 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 535 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 536 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 537 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 538 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 539 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 540 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 541 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 542 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 543 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 544 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 550 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 551 { 552 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 553 PC_IS *pcis = (PC_IS*)pc->data; 554 VecType impVecType; 555 PetscInt n_constraints,n_R,old_size,n_benign; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 if (!pcbddc->ConstraintMatrix) { 560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 561 } 562 /* get sizes */ 563 n_benign = 0; 564 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 565 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 566 if (n_benign && n_constraints) { 567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 568 } 569 n_R = pcis->n - pcbddc->n_vertices; 570 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 571 /* local work vectors (try to avoid unneeded work)*/ 572 /* R nodes */ 573 old_size = -1; 574 if (pcbddc->vec1_R) { 575 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 576 } 577 if (n_R != old_size) { 578 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 579 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 580 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 581 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 582 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 583 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 584 } 585 /* local primal dofs */ 586 old_size = -1; 587 if (pcbddc->vec1_P) { 588 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 589 } 590 if (pcbddc->local_primal_size != old_size) { 591 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 592 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 593 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 594 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 595 } 596 /* local explicit constraints */ 597 old_size = -1; 598 if (pcbddc->vec1_C) { 599 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 600 } 601 if (n_constraints && n_constraints != old_size) { 602 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 603 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 604 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 605 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 606 } 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNCT__ 611 #define __FUNCT__ "PCBDDCSetUpCorrection" 612 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 613 { 614 PetscErrorCode ierr; 615 /* pointers to pcis and pcbddc */ 616 PC_IS* pcis = (PC_IS*)pc->data; 617 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 618 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 619 /* submatrices of local problem */ 620 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 621 /* submatrices of benign trick */ 622 Mat B0_R,B0_V; 623 /* submatrices of local coarse problem */ 624 Mat S_VV,S_CV,S_VC,S_CC; 625 /* working matrices */ 626 Mat C_CR; 627 /* additional working stuff */ 628 PC pc_R; 629 Mat F,B0 = NULL; 630 Vec vec_R_benign,vec_R_benign_2; 631 PetscBool isLU,isCHOL,isILU; 632 633 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 634 PetscScalar *work; 635 PetscInt *idx_V_B; 636 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 637 PetscInt i,n_R,n_D,n_B; 638 PetscBool unsymmetric_check; 639 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 640 MatType impMatType; 641 /* some shortcuts to scalars */ 642 PetscScalar one=1.0,m_one=-1.0; 643 644 PetscFunctionBegin; 645 n_benign = 0; 646 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 647 n_vertices = pcbddc->n_vertices; 648 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 649 /* Set Non-overlapping dimensions */ 650 n_B = pcis->n_B; 651 n_D = pcis->n - n_B; 652 n_R = pcis->n - n_vertices; 653 654 /* Set types for local objects needed by BDDC precondtioner */ 655 impMatType = MATSEQDENSE; 656 657 /* vertices in boundary numbering */ 658 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 659 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 660 if (i != n_vertices) { 661 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 662 } 663 664 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 665 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 666 if (!n_benign) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 668 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 670 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 672 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 674 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 675 } else { /* these wrappers are not needed with the benign trick */ 676 S_VV = S_CV = S_VC = S_CC = NULL; 677 } 678 679 unsymmetric_check = PETSC_FALSE; 680 /* allocate workspace */ 681 n = 0; 682 if (n_constraints) { 683 n += n_R*n_constraints; 684 } 685 if (n_vertices) { 686 n = PetscMax(2*n_R*n_vertices,n); 687 n = PetscMax((n_R+n_B)*n_vertices,n); 688 } 689 if (!pcbddc->symmetric_primal) { 690 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 691 unsymmetric_check = PETSC_TRUE; 692 } 693 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 694 695 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 696 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 699 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 700 if (isLU || isILU || isCHOL) { 701 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 702 } else if (sub_schurs->reuse_mumps) { 703 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 704 MatFactorType type; 705 706 F = reuse_mumps->F; 707 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 708 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 709 } else { 710 F = NULL; 711 } 712 713 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 714 if (n_constraints) { 715 Mat M1,M2,M3; 716 Mat auxmat; 717 IS is_aux; 718 PetscScalar *array,*array2; 719 720 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 722 723 /* Extract constraints on R nodes: C_{CR} */ 724 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 727 728 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 729 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 730 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 731 for (i=0;i<n_constraints;i++) { 732 const PetscScalar *row_cmat_values; 733 const PetscInt *row_cmat_indices; 734 PetscInt size_of_constraint,j; 735 736 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 737 for (j=0;j<size_of_constraint;j++) { 738 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 739 } 740 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 741 } 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 743 if (F) { 744 Mat B; 745 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 747 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 748 ierr = MatDestroy(&B);CHKERRQ(ierr); 749 } else { 750 PetscScalar *marr; 751 752 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 753 for (i=0;i<n_constraints;i++) { 754 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 756 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 758 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 759 } 760 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 761 } 762 if (!pcbddc->switch_static) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 764 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 765 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 766 for (i=0;i<n_constraints;i++) { 767 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 768 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 769 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 772 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 773 } 774 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 775 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 776 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 777 } else { 778 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 779 pcbddc->local_auxmat2 = local_auxmat2_R; 780 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 781 } 782 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 783 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 784 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 785 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 786 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 787 if (isCHOL) { 788 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 789 } else { 790 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 791 } 792 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 793 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 794 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 795 ierr = MatDestroy(&M2);CHKERRQ(ierr); 796 ierr = MatDestroy(&M3);CHKERRQ(ierr); 797 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 798 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 799 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 800 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 801 ierr = MatDestroy(&M1);CHKERRQ(ierr); 802 } 803 /* Get submatrices from subdomain matrix */ 804 if (n_vertices) { 805 IS is_aux; 806 807 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 808 IS tis; 809 810 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 811 ierr = ISSort(tis);CHKERRQ(ierr); 812 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 813 ierr = ISDestroy(&tis);CHKERRQ(ierr); 814 } else { 815 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 816 } 817 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 818 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 820 if (n_benign) { 821 IS dummy; 822 PetscInt ii[2]; 823 824 ii[0] = 0; 825 ii[1] = pcbddc->B0_ncol; 826 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 827 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 828 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 829 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 830 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 831 } 832 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 833 } 834 835 /* Matrix of coarse basis functions (local) */ 836 if (pcbddc->coarse_phi_B) { 837 PetscInt on_B,on_primal,on_D=n_D; 838 if (pcbddc->coarse_phi_D) { 839 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 840 } 841 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 842 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 843 PetscScalar *marray; 844 845 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 846 ierr = PetscFree(marray);CHKERRQ(ierr); 847 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 848 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 849 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 850 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 851 } 852 } 853 854 if (!pcbddc->coarse_phi_B) { 855 PetscScalar *marray; 856 857 n = n_B*pcbddc->local_primal_size; 858 if (pcbddc->switch_static || pcbddc->dbg_flag) { 859 n += n_D*pcbddc->local_primal_size; 860 } 861 if (!pcbddc->symmetric_primal) { 862 n *= 2; 863 } 864 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 866 n = n_B*pcbddc->local_primal_size; 867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 869 n += n_D*pcbddc->local_primal_size; 870 } 871 if (!pcbddc->symmetric_primal) { 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 873 if (pcbddc->switch_static || pcbddc->dbg_flag) { 874 n = n_B*pcbddc->local_primal_size; 875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 876 } 877 } else { 878 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 879 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 880 if (pcbddc->switch_static || pcbddc->dbg_flag) { 881 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 882 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 883 } 884 } 885 } 886 887 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 888 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 889 const PetscInt *idxs; 890 891 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 892 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 894 } 895 /* vertices */ 896 if (n_vertices) { 897 898 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 899 900 if (n_R) { 901 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 902 Mat S_V_benign; 903 PetscBLASInt B_N,B_one = 1; 904 PetscScalar *x,*y; 905 PetscBool isseqaij; 906 907 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 908 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 910 if (F) { /* TODO could be optimized for symmetric problems */ 911 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 912 } else { 913 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 914 for (i=0;i<n_vertices;i++) { 915 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 916 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 917 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 920 } 921 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 922 } 923 if (n_benign) { 924 const PetscScalar *vals; 925 const PetscInt *idxs; 926 PetscInt n; 927 928 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 929 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr); 930 ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr); 931 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 932 ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 933 ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr); 934 ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr); 935 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 936 ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr); 937 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 938 } 939 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 940 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 941 if (n_constraints) { 942 Mat B; 943 944 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 945 for (i=0;i<n_vertices;i++) { 946 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 947 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 } 953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 954 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 955 ierr = MatDestroy(&B);CHKERRQ(ierr); 956 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 957 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 958 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 961 ierr = MatDestroy(&B);CHKERRQ(ierr); 962 } 963 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 964 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 965 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 966 } 967 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 968 if (n_benign) { 969 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 970 } 971 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 973 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 974 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 975 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 976 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 978 if (!n_benign) { 979 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 980 } else { 981 const PetscScalar *vals; 982 PetscScalar val; 983 const PetscInt *idxs; 984 PetscInt n,j; 985 986 /* Use explicit loops to populate the local coarse matrix */ 987 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 988 for (j=0;j<n_vertices;j++) { 989 for (i=0;i<n_vertices;i++) { 990 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 991 } 992 } 993 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 994 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 995 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 996 for (j=0;j<n_vertices;j++) { 997 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 998 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 999 } 1000 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1001 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 1002 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 1003 ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr); 1004 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val; 1005 } 1006 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1007 } else { 1008 /* TODO benign case also here */ 1009 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 1010 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1011 } 1012 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1013 /* coarse basis functions */ 1014 for (i=0;i<n_vertices;i++) { 1015 PetscScalar *y; 1016 1017 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1018 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1019 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1020 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 y[n_B*i+idx_V_B[i]] = 1.0; 1023 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1024 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1025 1026 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1027 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1028 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1032 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1033 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1034 } 1035 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1036 } 1037 if (n_benign) { 1038 PetscScalar *y; 1039 1040 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1041 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1042 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1045 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1046 1047 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1048 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1049 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr); 1050 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1053 y[n_D*n_vertices+p0_lidx_I] = 0.0; 1054 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1055 } 1056 } 1057 /* if n_R == 0 the object is not destroyed */ 1058 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1059 } 1060 1061 if (n_constraints) { 1062 Mat B; 1063 1064 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1065 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1066 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1067 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1068 if (n_vertices) { 1069 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1070 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1071 } else { 1072 Mat S_VCt; 1073 1074 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1075 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1077 } 1078 } 1079 ierr = MatDestroy(&B);CHKERRQ(ierr); 1080 /* coarse basis functions */ 1081 for (i=0;i<n_constraints;i++) { 1082 PetscScalar *y; 1083 1084 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1085 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1086 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1090 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1091 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1092 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1093 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1094 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1097 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1098 } 1099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1100 } 1101 } 1102 if (n_constraints) { 1103 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1104 } 1105 1106 if (n_benign) { 1107 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1108 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1109 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1110 ierr = VecDestroy(&vec_R_benign_2);CHKERRQ(ierr); 1111 } 1112 1113 /* compute other basis functions for non-symmetric problems */ 1114 if (!pcbddc->symmetric_primal) { 1115 1116 if (n_constraints) { 1117 Mat S_CCT,B_C; 1118 1119 /* this is a lazy thing */ 1120 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1121 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1122 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1123 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1124 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1125 if (n_vertices) { 1126 Mat B_V,S_VCT; 1127 1128 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1129 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1130 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1131 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1132 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1133 } 1134 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1135 } else { /* if there are no constraints, reset work */ 1136 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1137 } 1138 if (n_vertices && n_R) { 1139 Mat A_VRT; 1140 PetscScalar *marray; 1141 PetscBLASInt B_N,B_one = 1; 1142 1143 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1144 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1145 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1146 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1147 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1148 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1149 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1150 } 1151 1152 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1153 for (i=0;i<pcbddc->local_primal_size;i++) { 1154 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1155 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1156 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1157 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1158 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1159 } 1160 } else { 1161 for (i=0;i<pcbddc->local_primal_size;i++) { 1162 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1163 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1164 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1165 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1166 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1167 } 1168 } 1169 /* coarse basis functions */ 1170 for (i=0;i<pcbddc->local_primal_size;i++) { 1171 PetscScalar *y; 1172 1173 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1174 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1175 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1176 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 if (i<n_vertices) { 1179 y[n_B*i+idx_V_B[i]] = 1.0; 1180 } 1181 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1182 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1183 1184 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1185 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1186 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1187 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1188 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1189 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1190 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1191 } 1192 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1193 } 1194 } 1195 /* free memory */ 1196 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1197 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1198 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1199 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1200 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1201 ierr = PetscFree(work);CHKERRQ(ierr); 1202 if (n_vertices) { 1203 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1204 } 1205 if (n_constraints) { 1206 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1207 } 1208 /* Checking coarse_sub_mat and coarse basis functios */ 1209 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1210 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1211 if (pcbddc->dbg_flag) { 1212 Mat coarse_sub_mat; 1213 Mat AUXMAT,TM1,TM2,TM3,TM4; 1214 Mat coarse_phi_D,coarse_phi_B; 1215 Mat coarse_psi_D,coarse_psi_B; 1216 Mat A_II,A_BB,A_IB,A_BI; 1217 Mat C_B,CPHI; 1218 IS is_dummy; 1219 Vec mones; 1220 MatType checkmattype=MATSEQAIJ; 1221 PetscReal real_value; 1222 1223 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1224 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1225 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1226 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1227 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1228 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1229 if (unsymmetric_check) { 1230 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1231 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1232 } 1233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1234 1235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1237 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1238 if (unsymmetric_check) { 1239 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1240 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1241 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1242 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1243 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1244 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1245 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1246 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1247 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1248 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1249 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1250 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1251 } else { 1252 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1253 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1254 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1255 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1256 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1257 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1258 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1259 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1260 } 1261 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1262 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1263 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1264 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1265 if (n_benign) { 1266 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1267 PetscScalar *data,*data2; 1268 1269 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1270 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1271 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1272 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1273 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1274 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1275 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1276 for (i=0;i<pcbddc->local_primal_size;i++) { 1277 data[n_vertices*pcbddc->local_primal_size+i] += data2[i]; 1278 data[i*pcbddc->local_primal_size+n_vertices] += data2[i]; 1279 } 1280 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1281 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1282 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1283 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1284 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1285 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1286 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1287 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1288 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1289 } 1290 #if 0 1291 { 1292 PetscViewer viewer; 1293 char filename[256]; 1294 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1295 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1296 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1297 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1298 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1299 } 1300 #endif 1301 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1302 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1305 1306 /* check constraints */ 1307 if (!n_benign) { 1308 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1309 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1310 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1311 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1312 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1313 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1314 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1316 if (unsymmetric_check) { 1317 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1318 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1319 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1320 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1322 } 1323 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1324 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1326 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1327 } 1328 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1329 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1330 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1331 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1332 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1333 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1334 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1335 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1336 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1337 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1338 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1339 if (unsymmetric_check) { 1340 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1341 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1342 } 1343 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1344 } 1345 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1346 /* get back data */ 1347 *coarse_submat_vals_n = coarse_submat_vals; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1353 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1354 { 1355 Mat *work_mat; 1356 IS isrow_s,iscol_s; 1357 PetscBool rsorted,csorted; 1358 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1363 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1364 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1365 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1366 1367 if (!rsorted) { 1368 const PetscInt *idxs; 1369 PetscInt *idxs_sorted,i; 1370 1371 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1372 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1373 for (i=0;i<rsize;i++) { 1374 idxs_perm_r[i] = i; 1375 } 1376 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1377 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1378 for (i=0;i<rsize;i++) { 1379 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1380 } 1381 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1382 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1383 } else { 1384 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1385 isrow_s = isrow; 1386 } 1387 1388 if (!csorted) { 1389 if (isrow == iscol) { 1390 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1391 iscol_s = isrow_s; 1392 } else { 1393 const PetscInt *idxs; 1394 PetscInt *idxs_sorted,i; 1395 1396 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1397 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1398 for (i=0;i<csize;i++) { 1399 idxs_perm_c[i] = i; 1400 } 1401 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1402 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1403 for (i=0;i<csize;i++) { 1404 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1405 } 1406 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1407 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1408 } 1409 } else { 1410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1411 iscol_s = iscol; 1412 } 1413 1414 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1415 1416 if (!rsorted || !csorted) { 1417 Mat new_mat; 1418 IS is_perm_r,is_perm_c; 1419 1420 if (!rsorted) { 1421 PetscInt *idxs_r,i; 1422 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1423 for (i=0;i<rsize;i++) { 1424 idxs_r[idxs_perm_r[i]] = i; 1425 } 1426 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1427 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1428 } else { 1429 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1430 } 1431 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1432 1433 if (!csorted) { 1434 if (isrow_s == iscol_s) { 1435 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1436 is_perm_c = is_perm_r; 1437 } else { 1438 PetscInt *idxs_c,i; 1439 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1440 for (i=0;i<csize;i++) { 1441 idxs_c[idxs_perm_c[i]] = i; 1442 } 1443 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1445 } 1446 } else { 1447 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1448 } 1449 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1450 1451 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1452 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1453 work_mat[0] = new_mat; 1454 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1455 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1456 } 1457 1458 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1459 *B = work_mat[0]; 1460 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1461 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1462 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1468 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1469 { 1470 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1471 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1472 Mat new_mat; 1473 IS is_local,is_global; 1474 PetscInt local_size; 1475 PetscBool isseqaij; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1480 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1482 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1483 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1484 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1485 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1486 1487 /* check */ 1488 if (pcbddc->dbg_flag) { 1489 Vec x,x_change; 1490 PetscReal error; 1491 1492 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1493 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1494 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1495 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1498 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1499 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1500 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1501 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1504 ierr = VecDestroy(&x);CHKERRQ(ierr); 1505 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1506 } 1507 1508 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1509 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1510 if (isseqaij) { 1511 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1512 } else { 1513 Mat work_mat; 1514 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1515 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1516 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1517 } 1518 if (matis->A->symmetric_set) { 1519 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1520 #if !defined(PETSC_USE_COMPLEX) 1521 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1522 #endif 1523 } 1524 /* 1525 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1526 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1527 */ 1528 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1534 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1535 { 1536 PC_IS* pcis = (PC_IS*)(pc->data); 1537 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1538 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1539 PetscInt *idx_R_local=NULL; 1540 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1541 PetscInt vbs,bs; 1542 PetscBT bitmask=NULL; 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 /* 1547 No need to setup local scatters if 1548 - primal space is unchanged 1549 AND 1550 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1551 AND 1552 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1553 */ 1554 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1555 PetscFunctionReturn(0); 1556 } 1557 /* destroy old objects */ 1558 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1559 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1560 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1561 /* Set Non-overlapping dimensions */ 1562 n_B = pcis->n_B; 1563 n_D = pcis->n - n_B; 1564 n_vertices = pcbddc->n_vertices; 1565 1566 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1567 1568 /* create auxiliary bitmask and allocate workspace */ 1569 if (!sub_schurs->reuse_mumps) { 1570 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1571 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1572 for (i=0;i<n_vertices;i++) { 1573 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1574 } 1575 1576 for (i=0, n_R=0; i<pcis->n; i++) { 1577 if (!PetscBTLookup(bitmask,i)) { 1578 idx_R_local[n_R++] = i; 1579 } 1580 } 1581 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1582 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1583 1584 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1585 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1586 } 1587 1588 /* Block code */ 1589 vbs = 1; 1590 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1591 if (bs>1 && !(n_vertices%bs)) { 1592 PetscBool is_blocked = PETSC_TRUE; 1593 PetscInt *vary; 1594 if (!sub_schurs->reuse_mumps) { 1595 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1596 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1597 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1598 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1599 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1600 for (i=0; i<pcis->n/bs; i++) { 1601 if (vary[i]!=0 && vary[i]!=bs) { 1602 is_blocked = PETSC_FALSE; 1603 break; 1604 } 1605 } 1606 ierr = PetscFree(vary);CHKERRQ(ierr); 1607 } else { 1608 /* Verify directly the R set */ 1609 for (i=0; i<n_R/bs; i++) { 1610 PetscInt j,node=idx_R_local[bs*i]; 1611 for (j=1; j<bs; j++) { 1612 if (node != idx_R_local[bs*i+j]-j) { 1613 is_blocked = PETSC_FALSE; 1614 break; 1615 } 1616 } 1617 } 1618 } 1619 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1620 vbs = bs; 1621 for (i=0;i<n_R/vbs;i++) { 1622 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1623 } 1624 } 1625 } 1626 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1627 if (sub_schurs->reuse_mumps) { 1628 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1629 1630 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1631 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1632 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1633 reuse_mumps->is_R = pcbddc->is_R_local; 1634 } else { 1635 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1636 } 1637 1638 /* print some info if requested */ 1639 if (pcbddc->dbg_flag) { 1640 PetscInt benign = 0; 1641 1642 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1645 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1650 } 1651 1652 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1653 if (!sub_schurs->reuse_mumps) { 1654 IS is_aux1,is_aux2; 1655 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1656 1657 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1658 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1659 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1660 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1661 for (i=0; i<n_D; i++) { 1662 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1663 } 1664 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1665 for (i=0, j=0; i<n_R; i++) { 1666 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1667 aux_array1[j++] = i; 1668 } 1669 } 1670 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1671 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1672 for (i=0, j=0; i<n_B; i++) { 1673 if (!PetscBTLookup(bitmask,is_indices[i])) { 1674 aux_array2[j++] = i; 1675 } 1676 } 1677 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1678 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1679 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1680 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1681 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1682 1683 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1684 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1685 for (i=0, j=0; i<n_R; i++) { 1686 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1687 aux_array1[j++] = i; 1688 } 1689 } 1690 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1691 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1692 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1693 } 1694 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1695 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1696 } else { 1697 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1698 IS tis; 1699 PetscInt schur_size; 1700 1701 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1702 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1703 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1705 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1706 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1707 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1708 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1709 } 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1717 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1718 { 1719 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1720 PC_IS *pcis = (PC_IS*)pc->data; 1721 PC pc_temp; 1722 Mat A_RR; 1723 MatReuse reuse; 1724 PetscScalar m_one = -1.0; 1725 PetscReal value; 1726 PetscInt n_D,n_R; 1727 PetscBool use_exact,use_exact_reduced,issbaij; 1728 PetscErrorCode ierr; 1729 /* prefixes stuff */ 1730 char dir_prefix[256],neu_prefix[256],str_level[16]; 1731 size_t len; 1732 1733 PetscFunctionBegin; 1734 1735 /* compute prefixes */ 1736 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1737 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1738 if (!pcbddc->current_level) { 1739 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1740 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1741 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1742 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1743 } else { 1744 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1745 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1746 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1747 len -= 15; /* remove "pc_bddc_coarse_" */ 1748 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1749 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1750 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1751 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1752 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1753 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1754 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1755 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1756 } 1757 1758 /* DIRICHLET PROBLEM */ 1759 if (dirichlet) { 1760 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1761 if (pcbddc->local_mat->symmetric_set) { 1762 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1763 } 1764 /* Matrix for Dirichlet problem is pcis->A_II */ 1765 n_D = pcis->n - pcis->n_B; 1766 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1767 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1768 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1769 /* default */ 1770 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1771 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1772 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1773 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1774 if (issbaij) { 1775 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1776 } else { 1777 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1778 } 1779 /* Allow user's customization */ 1780 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1781 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1782 } 1783 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1784 if (sub_schurs->reuse_mumps) { 1785 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1786 1787 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1788 } 1789 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1790 if (!n_D) { 1791 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1792 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1793 } 1794 /* Set Up KSP for Dirichlet problem of BDDC */ 1795 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1796 /* set ksp_D into pcis data */ 1797 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1798 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1799 pcis->ksp_D = pcbddc->ksp_D; 1800 } 1801 1802 /* NEUMANN PROBLEM */ 1803 A_RR = 0; 1804 if (neumann) { 1805 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1806 PetscInt ibs,mbs; 1807 PetscBool issbaij; 1808 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1809 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1810 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1811 if (pcbddc->ksp_R) { /* already created ksp */ 1812 PetscInt nn_R; 1813 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1814 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1815 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1816 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1817 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1818 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1819 reuse = MAT_INITIAL_MATRIX; 1820 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1821 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1822 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1823 reuse = MAT_INITIAL_MATRIX; 1824 } else { /* safe to reuse the matrix */ 1825 reuse = MAT_REUSE_MATRIX; 1826 } 1827 } 1828 /* last check */ 1829 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1830 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1831 reuse = MAT_INITIAL_MATRIX; 1832 } 1833 } else { /* first time, so we need to create the matrix */ 1834 reuse = MAT_INITIAL_MATRIX; 1835 } 1836 /* extract A_RR */ 1837 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1838 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1839 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1840 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1841 if (matis->A == pcbddc->local_mat) { 1842 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1843 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1844 } else { 1845 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1846 } 1847 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1848 if (matis->A == pcbddc->local_mat) { 1849 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1850 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1851 } else { 1852 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1853 } 1854 } 1855 if (!sub_schurs->reuse_mumps) { 1856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1857 if (pcbddc->local_mat->symmetric_set) { 1858 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1859 } 1860 } else { 1861 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1862 1863 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1864 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1865 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1866 } 1867 if (!pcbddc->ksp_R) { /* create object if not present */ 1868 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1869 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1870 /* default */ 1871 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1872 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1873 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1874 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1875 if (issbaij) { 1876 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1877 } else { 1878 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1879 } 1880 /* Allow user's customization */ 1881 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1882 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1883 } 1884 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1885 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1886 if (!n_R) { 1887 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1888 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1889 } 1890 /* Reuse MUMPS solver if it is present */ 1891 if (sub_schurs->reuse_mumps) { 1892 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1893 1894 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1895 } 1896 /* Set Up KSP for Neumann problem of BDDC */ 1897 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1898 } 1899 /* free Neumann problem's matrix */ 1900 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1901 1902 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1903 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1904 if (pcbddc->dbg_flag) { 1905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1906 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1908 } 1909 if (dirichlet) { /* Dirichlet */ 1910 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1911 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1912 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1913 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1914 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1915 /* need to be adapted? */ 1916 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1917 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1918 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1919 /* print info */ 1920 if (pcbddc->dbg_flag) { 1921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1922 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1923 } 1924 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1925 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1926 } 1927 } 1928 if (neumann) { /* Neumann */ 1929 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1930 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1931 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1932 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1934 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1935 /* need to be adapted? */ 1936 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1937 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1938 /* print info */ 1939 if (pcbddc->dbg_flag) { 1940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1942 } 1943 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1944 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1945 } 1946 } 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1953 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1954 { 1955 PetscErrorCode ierr; 1956 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1957 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1958 1959 PetscFunctionBegin; 1960 if (!sub_schurs->reuse_mumps) { 1961 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1962 } 1963 if (!pcbddc->switch_static) { 1964 if (applytranspose && pcbddc->local_auxmat1) { 1965 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1966 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1967 } 1968 if (!sub_schurs->reuse_mumps) { 1969 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1971 } else { 1972 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1973 1974 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 } 1977 } else { 1978 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1979 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1980 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1981 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1982 if (applytranspose && pcbddc->local_auxmat1) { 1983 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1984 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1985 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1986 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1987 } 1988 } 1989 if (!sub_schurs->reuse_mumps) { 1990 if (applytranspose) { 1991 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1992 } else { 1993 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1994 } 1995 #if defined(PETSC_HAVE_MUMPS) 1996 } else { 1997 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1998 1999 if (applytranspose) { 2000 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2003 } 2004 #endif 2005 } 2006 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2007 if (!pcbddc->switch_static) { 2008 if (!sub_schurs->reuse_mumps) { 2009 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2011 } else { 2012 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2013 2014 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2015 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2016 } 2017 if (!applytranspose && pcbddc->local_auxmat1) { 2018 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2019 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2020 } 2021 } else { 2022 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 if (!applytranspose && pcbddc->local_auxmat1) { 2027 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2028 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2029 } 2030 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2041 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2042 { 2043 PetscErrorCode ierr; 2044 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2045 PC_IS* pcis = (PC_IS*) (pc->data); 2046 const PetscScalar zero = 0.0; 2047 2048 PetscFunctionBegin; 2049 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2050 if (applytranspose) { 2051 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2052 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2053 } else { 2054 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2055 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2056 } 2057 2058 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2059 if (pcbddc->benign_p0_lidx >= 0) { 2060 PetscScalar *array; 2061 2062 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2063 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2064 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2065 } 2066 2067 /* start communications from local primal nodes to rhs of coarse solver */ 2068 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2069 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 2072 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2073 /* TODO remove null space when doing multilevel */ 2074 if (pcbddc->coarse_ksp) { 2075 Vec rhs,sol; 2076 2077 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2078 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2079 if (applytranspose) { 2080 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2081 } else { 2082 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2083 } 2084 } 2085 2086 /* Local solution on R nodes */ 2087 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2088 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2089 } 2090 2091 /* communications from coarse sol to local primal nodes */ 2092 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2093 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2094 2095 /* Sum contributions from two levels */ 2096 if (applytranspose) { 2097 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2098 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2099 } else { 2100 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2101 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2102 } 2103 /* store p0 */ 2104 if (pcbddc->benign_p0_lidx >= 0) { 2105 PetscScalar *array; 2106 2107 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2108 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2109 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2116 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2117 { 2118 PetscErrorCode ierr; 2119 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2120 PetscScalar *array; 2121 Vec from,to; 2122 2123 PetscFunctionBegin; 2124 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2125 from = pcbddc->coarse_vec; 2126 to = pcbddc->vec1_P; 2127 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2128 Vec tvec; 2129 2130 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2131 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2132 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2133 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2134 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2135 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2136 } 2137 } else { /* from local to global -> put data in coarse right hand side */ 2138 from = pcbddc->vec1_P; 2139 to = pcbddc->coarse_vec; 2140 } 2141 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2147 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2148 { 2149 PetscErrorCode ierr; 2150 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2151 PetscScalar *array; 2152 Vec from,to; 2153 2154 PetscFunctionBegin; 2155 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2156 from = pcbddc->coarse_vec; 2157 to = pcbddc->vec1_P; 2158 } else { /* from local to global -> put data in coarse right hand side */ 2159 from = pcbddc->vec1_P; 2160 to = pcbddc->coarse_vec; 2161 } 2162 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2163 if (smode == SCATTER_FORWARD) { 2164 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2165 Vec tvec; 2166 2167 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2168 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2169 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2170 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2171 } 2172 } else { 2173 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2174 ierr = VecResetArray(from);CHKERRQ(ierr); 2175 } 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /* uncomment for testing purposes */ 2181 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2184 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2185 { 2186 PetscErrorCode ierr; 2187 PC_IS* pcis = (PC_IS*)(pc->data); 2188 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2189 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2190 /* one and zero */ 2191 PetscScalar one=1.0,zero=0.0; 2192 /* space to store constraints and their local indices */ 2193 PetscScalar *constraints_data; 2194 PetscInt *constraints_idxs,*constraints_idxs_B; 2195 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2196 PetscInt *constraints_n; 2197 /* iterators */ 2198 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2199 /* BLAS integers */ 2200 PetscBLASInt lwork,lierr; 2201 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2202 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2203 /* reuse */ 2204 PetscInt olocal_primal_size,olocal_primal_size_cc; 2205 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2206 /* change of basis */ 2207 PetscBool qr_needed; 2208 PetscBT change_basis,qr_needed_idx; 2209 /* auxiliary stuff */ 2210 PetscInt *nnz,*is_indices; 2211 PetscInt ncc; 2212 /* some quantities */ 2213 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2214 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2215 2216 PetscFunctionBegin; 2217 /* Destroy Mat objects computed previously */ 2218 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2219 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2220 /* save info on constraints from previous setup (if any) */ 2221 olocal_primal_size = pcbddc->local_primal_size; 2222 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2223 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2224 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2225 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2226 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2227 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2228 2229 /* print some info */ 2230 if (pcbddc->dbg_flag) { 2231 IS vertices; 2232 PetscInt nv,nedges,nfaces; 2233 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2234 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2235 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2236 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2242 } 2243 2244 if (!pcbddc->adaptive_selection) { 2245 IS ISForVertices,*ISForFaces,*ISForEdges; 2246 MatNullSpace nearnullsp; 2247 const Vec *nearnullvecs; 2248 Vec *localnearnullsp; 2249 PetscScalar *array; 2250 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2251 PetscBool nnsp_has_cnst; 2252 /* LAPACK working arrays for SVD or POD */ 2253 PetscBool skip_lapack,boolforchange; 2254 PetscScalar *work; 2255 PetscReal *singular_vals; 2256 #if defined(PETSC_USE_COMPLEX) 2257 PetscReal *rwork; 2258 #endif 2259 #if defined(PETSC_MISSING_LAPACK_GESVD) 2260 PetscScalar *temp_basis,*correlation_mat; 2261 #else 2262 PetscBLASInt dummy_int=1; 2263 PetscScalar dummy_scalar=1.; 2264 #endif 2265 2266 /* Get index sets for faces, edges and vertices from graph */ 2267 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2268 /* free unneeded index sets */ 2269 if (!pcbddc->use_vertices) { 2270 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2271 } 2272 if (!pcbddc->use_edges) { 2273 for (i=0;i<n_ISForEdges;i++) { 2274 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2275 } 2276 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2277 n_ISForEdges = 0; 2278 } 2279 if (!pcbddc->use_faces) { 2280 for (i=0;i<n_ISForFaces;i++) { 2281 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2282 } 2283 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2284 n_ISForFaces = 0; 2285 } 2286 2287 #if defined(PETSC_USE_DEBUG) 2288 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2289 Also use_change_of_basis should be consistent among processors */ 2290 if (pcbddc->NullSpace) { 2291 PetscBool tbool[2],gbool[2]; 2292 2293 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2294 pcbddc->use_change_of_basis = PETSC_TRUE; 2295 if (!ISForEdges) { 2296 pcbddc->use_change_on_faces = PETSC_TRUE; 2297 } 2298 } 2299 tbool[0] = pcbddc->use_change_of_basis; 2300 tbool[1] = pcbddc->use_change_on_faces; 2301 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2302 pcbddc->use_change_of_basis = gbool[0]; 2303 pcbddc->use_change_on_faces = gbool[1]; 2304 } 2305 #endif 2306 2307 /* check if near null space is attached to global mat */ 2308 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2309 if (nearnullsp) { 2310 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2311 /* remove any stored info */ 2312 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2313 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2314 /* store information for BDDC solver reuse */ 2315 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2316 pcbddc->onearnullspace = nearnullsp; 2317 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2318 for (i=0;i<nnsp_size;i++) { 2319 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2320 } 2321 } else { /* if near null space is not provided BDDC uses constants by default */ 2322 nnsp_size = 0; 2323 nnsp_has_cnst = PETSC_TRUE; 2324 } 2325 /* get max number of constraints on a single cc */ 2326 max_constraints = nnsp_size; 2327 if (nnsp_has_cnst) max_constraints++; 2328 2329 /* 2330 Evaluate maximum storage size needed by the procedure 2331 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2332 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2333 There can be multiple constraints per connected component 2334 */ 2335 n_vertices = 0; 2336 if (ISForVertices) { 2337 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2338 } 2339 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2340 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2341 2342 total_counts = n_ISForFaces+n_ISForEdges; 2343 total_counts *= max_constraints; 2344 total_counts += n_vertices; 2345 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2346 2347 total_counts = 0; 2348 max_size_of_constraint = 0; 2349 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2350 IS used_is; 2351 if (i<n_ISForEdges) { 2352 used_is = ISForEdges[i]; 2353 } else { 2354 used_is = ISForFaces[i-n_ISForEdges]; 2355 } 2356 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2357 total_counts += j; 2358 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2359 } 2360 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2361 2362 /* get local part of global near null space vectors */ 2363 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2364 for (k=0;k<nnsp_size;k++) { 2365 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2366 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2367 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2368 } 2369 2370 /* whether or not to skip lapack calls */ 2371 skip_lapack = PETSC_TRUE; 2372 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2373 2374 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2375 if (!skip_lapack) { 2376 PetscScalar temp_work; 2377 2378 #if defined(PETSC_MISSING_LAPACK_GESVD) 2379 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2380 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2381 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2382 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2383 #if defined(PETSC_USE_COMPLEX) 2384 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2385 #endif 2386 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2387 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2388 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2389 lwork = -1; 2390 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2391 #if !defined(PETSC_USE_COMPLEX) 2392 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2393 #else 2394 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2395 #endif 2396 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2397 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2398 #else /* on missing GESVD */ 2399 /* SVD */ 2400 PetscInt max_n,min_n; 2401 max_n = max_size_of_constraint; 2402 min_n = max_constraints; 2403 if (max_size_of_constraint < max_constraints) { 2404 min_n = max_size_of_constraint; 2405 max_n = max_constraints; 2406 } 2407 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2408 #if defined(PETSC_USE_COMPLEX) 2409 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2410 #endif 2411 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2412 lwork = -1; 2413 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2414 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2416 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2417 #if !defined(PETSC_USE_COMPLEX) 2418 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2419 #else 2420 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2421 #endif 2422 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2423 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2424 #endif /* on missing GESVD */ 2425 /* Allocate optimal workspace */ 2426 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2427 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2428 } 2429 /* Now we can loop on constraining sets */ 2430 total_counts = 0; 2431 constraints_idxs_ptr[0] = 0; 2432 constraints_data_ptr[0] = 0; 2433 /* vertices */ 2434 if (n_vertices) { 2435 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2436 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2437 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2438 for (i=0;i<n_vertices;i++) { 2439 constraints_n[total_counts] = 1; 2440 constraints_data[total_counts] = 1.0; 2441 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2442 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2443 total_counts++; 2444 } 2445 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2446 PetscBool used_vertex; 2447 for (i=0;i<n_vertices;i++) { 2448 used_vertex = PETSC_FALSE; 2449 k = 0; 2450 while (!used_vertex && k<nnsp_size) { 2451 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2452 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2453 constraints_n[total_counts] = 1; 2454 constraints_idxs[total_counts] = is_indices[i]; 2455 constraints_data[total_counts] = 1.0; 2456 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2457 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2458 total_counts++; 2459 used_vertex = PETSC_TRUE; 2460 } 2461 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2462 k++; 2463 } 2464 } 2465 } 2466 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2467 n_vertices = total_counts; 2468 } 2469 2470 /* edges and faces */ 2471 total_counts_cc = total_counts; 2472 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2473 IS used_is; 2474 PetscBool idxs_copied = PETSC_FALSE; 2475 2476 if (ncc<n_ISForEdges) { 2477 used_is = ISForEdges[ncc]; 2478 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2479 } else { 2480 used_is = ISForFaces[ncc-n_ISForEdges]; 2481 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2482 } 2483 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2484 2485 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2486 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2487 /* change of basis should not be performed on local periodic nodes */ 2488 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2489 if (nnsp_has_cnst) { 2490 PetscScalar quad_value; 2491 2492 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2493 idxs_copied = PETSC_TRUE; 2494 2495 if (!pcbddc->use_nnsp_true) { 2496 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2497 } else { 2498 quad_value = 1.0; 2499 } 2500 for (j=0;j<size_of_constraint;j++) { 2501 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2502 } 2503 temp_constraints++; 2504 total_counts++; 2505 } 2506 for (k=0;k<nnsp_size;k++) { 2507 PetscReal real_value; 2508 PetscScalar *ptr_to_data; 2509 2510 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2511 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2512 for (j=0;j<size_of_constraint;j++) { 2513 ptr_to_data[j] = array[is_indices[j]]; 2514 } 2515 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2516 /* check if array is null on the connected component */ 2517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2518 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2519 if (real_value > 0.0) { /* keep indices and values */ 2520 temp_constraints++; 2521 total_counts++; 2522 if (!idxs_copied) { 2523 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2524 idxs_copied = PETSC_TRUE; 2525 } 2526 } 2527 } 2528 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2529 valid_constraints = temp_constraints; 2530 if (!pcbddc->use_nnsp_true && temp_constraints) { 2531 if (temp_constraints == 1) { /* just normalize the constraint */ 2532 PetscScalar norm,*ptr_to_data; 2533 2534 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2536 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2537 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2538 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2539 } else { /* perform SVD */ 2540 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2541 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2542 2543 #if defined(PETSC_MISSING_LAPACK_GESVD) 2544 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2545 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2546 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2547 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2548 from that computed using LAPACKgesvd 2549 -> This is due to a different computation of eigenvectors in LAPACKheev 2550 -> The quality of the POD-computed basis will be the same */ 2551 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2552 /* Store upper triangular part of correlation matrix */ 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2554 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2555 for (j=0;j<temp_constraints;j++) { 2556 for (k=0;k<j+1;k++) { 2557 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2558 } 2559 } 2560 /* compute eigenvalues and eigenvectors of correlation matrix */ 2561 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2563 #if !defined(PETSC_USE_COMPLEX) 2564 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2565 #else 2566 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2567 #endif 2568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2569 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2570 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2571 j = 0; 2572 while (j < temp_constraints && singular_vals[j] < tol) j++; 2573 total_counts = total_counts-j; 2574 valid_constraints = temp_constraints-j; 2575 /* scale and copy POD basis into used quadrature memory */ 2576 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2582 if (j<temp_constraints) { 2583 PetscInt ii; 2584 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2585 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2586 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2587 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2588 for (k=0;k<temp_constraints-j;k++) { 2589 for (ii=0;ii<size_of_constraint;ii++) { 2590 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2591 } 2592 } 2593 } 2594 #else /* on missing GESVD */ 2595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2596 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2597 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2598 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2599 #if !defined(PETSC_USE_COMPLEX) 2600 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2601 #else 2602 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2603 #endif 2604 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2605 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2606 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2607 k = temp_constraints; 2608 if (k > size_of_constraint) k = size_of_constraint; 2609 j = 0; 2610 while (j < k && singular_vals[k-j-1] < tol) j++; 2611 valid_constraints = k-j; 2612 total_counts = total_counts-temp_constraints+valid_constraints; 2613 #endif /* on missing GESVD */ 2614 } 2615 } 2616 /* update pointers information */ 2617 if (valid_constraints) { 2618 constraints_n[total_counts_cc] = valid_constraints; 2619 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2620 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2621 /* set change_of_basis flag */ 2622 if (boolforchange) { 2623 PetscBTSet(change_basis,total_counts_cc); 2624 } 2625 total_counts_cc++; 2626 } 2627 } 2628 /* free workspace */ 2629 if (!skip_lapack) { 2630 ierr = PetscFree(work);CHKERRQ(ierr); 2631 #if defined(PETSC_USE_COMPLEX) 2632 ierr = PetscFree(rwork);CHKERRQ(ierr); 2633 #endif 2634 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2635 #if defined(PETSC_MISSING_LAPACK_GESVD) 2636 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2637 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2638 #endif 2639 } 2640 for (k=0;k<nnsp_size;k++) { 2641 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2642 } 2643 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2644 /* free index sets of faces, edges and vertices */ 2645 for (i=0;i<n_ISForFaces;i++) { 2646 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2647 } 2648 if (n_ISForFaces) { 2649 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2650 } 2651 for (i=0;i<n_ISForEdges;i++) { 2652 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2653 } 2654 if (n_ISForEdges) { 2655 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2656 } 2657 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2658 } else { 2659 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2660 2661 total_counts = 0; 2662 n_vertices = 0; 2663 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2664 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2665 } 2666 max_constraints = 0; 2667 total_counts_cc = 0; 2668 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2669 total_counts += pcbddc->adaptive_constraints_n[i]; 2670 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2671 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2672 } 2673 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2674 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2675 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2676 constraints_data = pcbddc->adaptive_constraints_data; 2677 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2678 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2679 total_counts_cc = 0; 2680 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2681 if (pcbddc->adaptive_constraints_n[i]) { 2682 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2683 } 2684 } 2685 #if 0 2686 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2687 for (i=0;i<total_counts_cc;i++) { 2688 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2689 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2690 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2691 printf(" %d",constraints_idxs[j]); 2692 } 2693 printf("\n"); 2694 printf("number of cc: %d\n",constraints_n[i]); 2695 } 2696 for (i=0;i<n_vertices;i++) { 2697 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2698 } 2699 for (i=0;i<sub_schurs->n_subs;i++) { 2700 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2701 } 2702 #endif 2703 2704 max_size_of_constraint = 0; 2705 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2706 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2707 /* Change of basis */ 2708 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2709 if (pcbddc->use_change_of_basis) { 2710 for (i=0;i<sub_schurs->n_subs;i++) { 2711 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2712 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2713 } 2714 } 2715 } 2716 } 2717 pcbddc->local_primal_size = total_counts; 2718 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2719 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2720 2721 /* map constraints_idxs in boundary numbering */ 2722 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2723 if (i != constraints_idxs_ptr[total_counts_cc]) { 2724 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2725 } 2726 2727 /* Create constraint matrix */ 2728 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2729 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2730 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2731 2732 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2733 /* determine if a QR strategy is needed for change of basis */ 2734 qr_needed = PETSC_FALSE; 2735 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2736 total_primal_vertices=0; 2737 pcbddc->local_primal_size_cc = 0; 2738 for (i=0;i<total_counts_cc;i++) { 2739 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2740 if (size_of_constraint == 1) { 2741 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2742 pcbddc->local_primal_size_cc += 1; 2743 } else if (PetscBTLookup(change_basis,i)) { 2744 for (k=0;k<constraints_n[i];k++) { 2745 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2746 } 2747 pcbddc->local_primal_size_cc += constraints_n[i]; 2748 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2749 PetscBTSet(qr_needed_idx,i); 2750 qr_needed = PETSC_TRUE; 2751 } 2752 } else { 2753 pcbddc->local_primal_size_cc += 1; 2754 } 2755 } 2756 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2757 pcbddc->n_vertices = total_primal_vertices; 2758 /* permute indices in order to have a sorted set of vertices */ 2759 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2760 2761 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2762 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2763 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2764 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2765 2766 /* nonzero structure of constraint matrix */ 2767 /* and get reference dof for local constraints */ 2768 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2769 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2770 2771 j = total_primal_vertices; 2772 total_counts = total_primal_vertices; 2773 cum = total_primal_vertices; 2774 for (i=n_vertices;i<total_counts_cc;i++) { 2775 if (!PetscBTLookup(change_basis,i)) { 2776 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2777 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2778 cum++; 2779 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2780 for (k=0;k<constraints_n[i];k++) { 2781 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2782 nnz[j+k] = size_of_constraint; 2783 } 2784 j += constraints_n[i]; 2785 } 2786 } 2787 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2788 ierr = PetscFree(nnz);CHKERRQ(ierr); 2789 2790 /* set values in constraint matrix */ 2791 for (i=0;i<total_primal_vertices;i++) { 2792 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2793 } 2794 total_counts = total_primal_vertices; 2795 for (i=n_vertices;i<total_counts_cc;i++) { 2796 if (!PetscBTLookup(change_basis,i)) { 2797 PetscInt *cols; 2798 2799 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2800 cols = constraints_idxs+constraints_idxs_ptr[i]; 2801 for (k=0;k<constraints_n[i];k++) { 2802 PetscInt row = total_counts+k; 2803 PetscScalar *vals; 2804 2805 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2806 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2807 } 2808 total_counts += constraints_n[i]; 2809 } 2810 } 2811 /* assembling */ 2812 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 2815 /* 2816 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2817 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2818 */ 2819 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2820 if (pcbddc->use_change_of_basis) { 2821 /* dual and primal dofs on a single cc */ 2822 PetscInt dual_dofs,primal_dofs; 2823 /* working stuff for GEQRF */ 2824 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2825 PetscBLASInt lqr_work; 2826 /* working stuff for UNGQR */ 2827 PetscScalar *gqr_work,lgqr_work_t; 2828 PetscBLASInt lgqr_work; 2829 /* working stuff for TRTRS */ 2830 PetscScalar *trs_rhs; 2831 PetscBLASInt Blas_NRHS; 2832 /* pointers for values insertion into change of basis matrix */ 2833 PetscInt *start_rows,*start_cols; 2834 PetscScalar *start_vals; 2835 /* working stuff for values insertion */ 2836 PetscBT is_primal; 2837 PetscInt *aux_primal_numbering_B; 2838 /* matrix sizes */ 2839 PetscInt global_size,local_size; 2840 /* temporary change of basis */ 2841 Mat localChangeOfBasisMatrix; 2842 /* extra space for debugging */ 2843 PetscScalar *dbg_work; 2844 2845 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2846 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2847 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2848 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2849 /* nonzeros for local mat */ 2850 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2851 for (i=0;i<pcis->n;i++) nnz[i]=1; 2852 for (i=n_vertices;i<total_counts_cc;i++) { 2853 if (PetscBTLookup(change_basis,i)) { 2854 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2855 if (PetscBTLookup(qr_needed_idx,i)) { 2856 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2857 } else { 2858 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2859 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2860 } 2861 } 2862 } 2863 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2864 ierr = PetscFree(nnz);CHKERRQ(ierr); 2865 /* Set initial identity in the matrix */ 2866 for (i=0;i<pcis->n;i++) { 2867 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2868 } 2869 2870 if (pcbddc->dbg_flag) { 2871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2873 } 2874 2875 2876 /* Now we loop on the constraints which need a change of basis */ 2877 /* 2878 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2879 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2880 2881 Basic blocks of change of basis matrix T computed by 2882 2883 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2884 2885 | 1 0 ... 0 s_1/S | 2886 | 0 1 ... 0 s_2/S | 2887 | ... | 2888 | 0 ... 1 s_{n-1}/S | 2889 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2890 2891 with S = \sum_{i=1}^n s_i^2 2892 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2893 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2894 2895 - QR decomposition of constraints otherwise 2896 */ 2897 if (qr_needed) { 2898 /* space to store Q */ 2899 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2900 /* first we issue queries for optimal work */ 2901 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2902 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2904 lqr_work = -1; 2905 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2906 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2907 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2908 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2909 lgqr_work = -1; 2910 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2913 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2914 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2915 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2916 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2917 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2918 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2919 /* array to store scaling factors for reflectors */ 2920 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2921 /* array to store rhs and solution of triangular solver */ 2922 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2923 /* allocating workspace for check */ 2924 if (pcbddc->dbg_flag) { 2925 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2926 } 2927 } 2928 /* array to store whether a node is primal or not */ 2929 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2930 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2931 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2932 if (i != total_primal_vertices) { 2933 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2934 } 2935 for (i=0;i<total_primal_vertices;i++) { 2936 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2937 } 2938 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2939 2940 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2941 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2942 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2943 if (PetscBTLookup(change_basis,total_counts)) { 2944 /* get constraint info */ 2945 primal_dofs = constraints_n[total_counts]; 2946 dual_dofs = size_of_constraint-primal_dofs; 2947 2948 if (pcbddc->dbg_flag) { 2949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2950 } 2951 2952 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2953 2954 /* copy quadrature constraints for change of basis check */ 2955 if (pcbddc->dbg_flag) { 2956 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2957 } 2958 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2959 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2960 2961 /* compute QR decomposition of constraints */ 2962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2963 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2964 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2965 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2966 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2967 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2968 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2969 2970 /* explictly compute R^-T */ 2971 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2972 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2973 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2974 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2978 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2979 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2980 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2981 2982 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2984 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2985 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2987 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2988 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2989 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2990 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2991 2992 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2993 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2994 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2995 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2996 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2997 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2998 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2999 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3001 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3002 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3003 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3004 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3005 3006 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3007 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3008 /* insert cols for primal dofs */ 3009 for (j=0;j<primal_dofs;j++) { 3010 start_vals = &qr_basis[j*size_of_constraint]; 3011 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3012 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3013 } 3014 /* insert cols for dual dofs */ 3015 for (j=0,k=0;j<dual_dofs;k++) { 3016 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3017 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3018 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3019 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3020 j++; 3021 } 3022 } 3023 3024 /* check change of basis */ 3025 if (pcbddc->dbg_flag) { 3026 PetscInt ii,jj; 3027 PetscBool valid_qr=PETSC_TRUE; 3028 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3029 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3035 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3036 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3037 for (jj=0;jj<size_of_constraint;jj++) { 3038 for (ii=0;ii<primal_dofs;ii++) { 3039 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3040 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3041 } 3042 } 3043 if (!valid_qr) { 3044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3045 for (jj=0;jj<size_of_constraint;jj++) { 3046 for (ii=0;ii<primal_dofs;ii++) { 3047 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3048 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3049 } 3050 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3051 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3052 } 3053 } 3054 } 3055 } else { 3056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3057 } 3058 } 3059 } else { /* simple transformation block */ 3060 PetscInt row,col; 3061 PetscScalar val,norm; 3062 3063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3064 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3065 for (j=0;j<size_of_constraint;j++) { 3066 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3067 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3068 if (!PetscBTLookup(is_primal,row_B)) { 3069 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3070 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3071 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3072 } else { 3073 for (k=0;k<size_of_constraint;k++) { 3074 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3075 if (row != col) { 3076 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3077 } else { 3078 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3079 } 3080 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3081 } 3082 } 3083 } 3084 if (pcbddc->dbg_flag) { 3085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3086 } 3087 } 3088 } else { 3089 if (pcbddc->dbg_flag) { 3090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3091 } 3092 } 3093 } 3094 3095 /* free workspace */ 3096 if (qr_needed) { 3097 if (pcbddc->dbg_flag) { 3098 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3099 } 3100 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3101 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3102 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3103 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3104 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3105 } 3106 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3107 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 3110 /* assembling of global change of variable */ 3111 { 3112 Mat tmat; 3113 PetscInt bs; 3114 3115 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3116 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3117 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3118 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3119 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3120 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3121 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3122 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3123 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3124 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3125 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3126 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3127 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3128 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3129 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3130 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3131 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3132 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3133 } 3134 /* check */ 3135 if (pcbddc->dbg_flag) { 3136 PetscReal error; 3137 Vec x,x_change; 3138 3139 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3140 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3141 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3142 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3143 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3146 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3147 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3148 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3149 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3150 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3151 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3153 ierr = VecDestroy(&x);CHKERRQ(ierr); 3154 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3155 } 3156 3157 /* adapt sub_schurs computed (if any) */ 3158 if (pcbddc->use_deluxe_scaling) { 3159 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3160 if (sub_schurs->S_Ej_all) { 3161 Mat S_new,tmat; 3162 ISLocalToGlobalMapping NtoSall; 3163 IS is_all_N,is_V,is_V_Sall; 3164 const PetscScalar *array; 3165 const PetscInt *idxs_V,*idxs_all; 3166 PetscInt i,n_V; 3167 3168 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3169 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3170 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3171 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3172 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3173 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3174 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3175 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3176 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3177 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3178 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3179 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3180 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3181 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3182 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3183 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3184 for (i=0;i<n_V;i++) { 3185 PetscScalar val; 3186 PetscInt idx; 3187 3188 idx = idxs_V[i]; 3189 val = array[idxs_all[idxs_V[i]]]; 3190 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3191 } 3192 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3193 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3194 sub_schurs->S_Ej_all = S_new; 3195 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3196 if (sub_schurs->sum_S_Ej_all) { 3197 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3198 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3199 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3200 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3201 sub_schurs->sum_S_Ej_all = S_new; 3202 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3203 } 3204 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3205 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3206 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3207 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3208 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3209 } 3210 } 3211 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3212 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3213 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3214 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3215 } 3216 3217 /* set up change of basis context */ 3218 if (pcbddc->ChangeOfBasisMatrix) { 3219 PCBDDCChange_ctx change_ctx; 3220 3221 if (!pcbddc->new_global_mat) { 3222 PetscInt global_size,local_size; 3223 3224 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3225 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3226 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3227 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3228 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3229 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3230 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3231 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3232 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3233 } else { 3234 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3235 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3236 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3237 } 3238 if (!pcbddc->user_ChangeOfBasisMatrix) { 3239 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3240 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3241 } else { 3242 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3243 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3244 } 3245 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3246 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3247 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3248 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3249 } 3250 3251 /* add pressure dof to set of primal nodes for numbering purposes */ 3252 if (pcbddc->benign_p0_lidx >= 0) { 3253 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3254 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3255 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3256 pcbddc->local_primal_size_cc++; 3257 pcbddc->local_primal_size++; 3258 } 3259 3260 /* check if a new primal space has been introduced (also take into account benign trick) */ 3261 pcbddc->new_primal_space_local = PETSC_TRUE; 3262 if (olocal_primal_size == pcbddc->local_primal_size) { 3263 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3264 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3265 if (!pcbddc->new_primal_space_local) { 3266 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3267 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3268 } 3269 } 3270 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3271 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3272 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3273 3274 /* flush dbg viewer */ 3275 if (pcbddc->dbg_flag) { 3276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3277 } 3278 3279 /* free workspace */ 3280 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3281 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3282 if (!pcbddc->adaptive_selection) { 3283 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3284 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3285 } else { 3286 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3287 pcbddc->adaptive_constraints_idxs_ptr, 3288 pcbddc->adaptive_constraints_data_ptr, 3289 pcbddc->adaptive_constraints_idxs, 3290 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3291 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3292 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3293 } 3294 PetscFunctionReturn(0); 3295 } 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3299 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3300 { 3301 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3302 PC_IS *pcis = (PC_IS*)pc->data; 3303 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3304 PetscInt ierr,i,vertex_size,N; 3305 PetscViewer viewer=pcbddc->dbg_viewer; 3306 3307 PetscFunctionBegin; 3308 /* Reset previously computed graph */ 3309 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3310 /* Init local Graph struct */ 3311 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3312 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3313 3314 /* Check validity of the csr graph passed in by the user */ 3315 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3316 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3317 } 3318 3319 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3320 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3321 PetscInt *xadj,*adjncy; 3322 PetscInt nvtxs; 3323 PetscBool flg_row=PETSC_FALSE; 3324 3325 if (pcbddc->use_local_adj) { 3326 3327 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3328 if (flg_row) { 3329 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3330 pcbddc->computed_rowadj = PETSC_TRUE; 3331 } 3332 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3333 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3334 IS is_dummy; 3335 ISLocalToGlobalMapping l2gmap_dummy; 3336 PetscInt j,sum; 3337 PetscInt *cxadj,*cadjncy; 3338 const PetscInt *idxs; 3339 PCBDDCGraph graph; 3340 PetscBT is_on_boundary; 3341 3342 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3343 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3344 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3345 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3346 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3347 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3348 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3349 if (flg_row) { 3350 graph->xadj = xadj; 3351 graph->adjncy = adjncy; 3352 } 3353 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3354 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3355 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3356 3357 if (pcbddc->dbg_flag) { 3358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3359 for (i=0;i<graph->ncc;i++) { 3360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3361 } 3362 } 3363 3364 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3365 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3366 for (i=0;i<pcis->n_B;i++) { 3367 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3368 } 3369 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3370 3371 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3372 sum = 0; 3373 for (i=0;i<graph->ncc;i++) { 3374 PetscInt sizecc = 0; 3375 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3376 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3377 sizecc++; 3378 } 3379 } 3380 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3381 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3382 cxadj[graph->queue[j]] = sizecc; 3383 } 3384 } 3385 sum += sizecc*sizecc; 3386 } 3387 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3388 sum = 0; 3389 for (i=0;i<pcis->n;i++) { 3390 PetscInt temp = cxadj[i]; 3391 cxadj[i] = sum; 3392 sum += temp; 3393 } 3394 cxadj[pcis->n] = sum; 3395 for (i=0;i<graph->ncc;i++) { 3396 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3397 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3398 PetscInt k,sizecc = 0; 3399 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3400 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3401 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3402 sizecc++; 3403 } 3404 } 3405 } 3406 } 3407 } 3408 if (sum) { 3409 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3410 } else { 3411 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3412 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3413 } 3414 graph->xadj = 0; 3415 graph->adjncy = 0; 3416 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3417 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3418 } 3419 } 3420 if (pcbddc->dbg_flag) { 3421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3422 } 3423 3424 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3425 vertex_size = 1; 3426 if (pcbddc->user_provided_isfordofs) { 3427 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3428 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3429 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3430 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3431 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3432 } 3433 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3434 pcbddc->n_ISForDofs = 0; 3435 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3436 } 3437 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3438 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3439 } else { 3440 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3441 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3442 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3443 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3444 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3445 } 3446 } 3447 } 3448 3449 /* Setup of Graph */ 3450 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3451 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3452 } 3453 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3454 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3455 } 3456 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3457 3458 /* Graph's connected components analysis */ 3459 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3460 3461 /* print some info to stdout */ 3462 if (pcbddc->dbg_flag) { 3463 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3464 } 3465 3466 /* mark topography has done */ 3467 pcbddc->recompute_topography = PETSC_FALSE; 3468 PetscFunctionReturn(0); 3469 } 3470 3471 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "PCBDDCSubsetNumbering" 3474 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3475 { 3476 PetscSF sf; 3477 PetscLayout map; 3478 const PetscInt *idxs; 3479 PetscInt *leaf_data,*root_data,*gidxs; 3480 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3481 PetscInt n_n,nlocals,start,first_index; 3482 PetscMPIInt commsize; 3483 PetscBool first_found; 3484 PetscErrorCode ierr; 3485 3486 PetscFunctionBegin; 3487 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3488 if (subset_mult) { 3489 PetscCheckSameComm(subset,1,subset_mult,2); 3490 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3491 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3492 } 3493 /* create workspace layout for computing global indices of subset */ 3494 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3495 lbounds[0] = lbounds[1] = 0; 3496 for (i=0;i<n;i++) { 3497 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3498 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3499 } 3500 lbounds[0] = -lbounds[0]; 3501 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3502 gbounds[0] = -gbounds[0]; 3503 N = gbounds[1] - gbounds[0] + 1; 3504 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3505 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3506 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3507 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3508 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3509 3510 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3511 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3512 if (subset_mult) { 3513 const PetscInt* idxs_mult; 3514 3515 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3516 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3517 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3518 } else { 3519 for (i=0;i<n;i++) leaf_data[i] = 1; 3520 } 3521 /* local size of new subset */ 3522 n_n = 0; 3523 for (i=0;i<n;i++) n_n += leaf_data[i]; 3524 3525 /* global indexes in layout */ 3526 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3527 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3528 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3529 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3530 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3531 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3532 3533 /* reduce from leaves to roots */ 3534 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3535 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3536 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3537 3538 /* count indexes in local part of layout */ 3539 nlocals = 0; 3540 first_index = -1; 3541 first_found = PETSC_FALSE; 3542 for (i=0;i<Nl;i++) { 3543 if (!first_found && root_data[i]) { 3544 first_found = PETSC_TRUE; 3545 first_index = i; 3546 } 3547 nlocals += root_data[i]; 3548 } 3549 3550 /* cumulative of number of indexes and size of subset without holes */ 3551 #if defined(PETSC_HAVE_MPI_EXSCAN) 3552 start = 0; 3553 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3554 #else 3555 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3556 start = start-nlocals; 3557 #endif 3558 3559 if (N_n) { /* compute total size of new subset if requested */ 3560 *N_n = start + nlocals; 3561 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3562 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3563 } 3564 3565 /* adapt root data with cumulative */ 3566 if (first_found) { 3567 PetscInt old_index; 3568 3569 root_data[first_index] += start; 3570 old_index = first_index; 3571 for (i=first_index+1;i<Nl;i++) { 3572 if (root_data[i]) { 3573 root_data[i] += root_data[old_index]; 3574 old_index = i; 3575 } 3576 } 3577 } 3578 3579 /* from roots to leaves */ 3580 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3581 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3582 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3583 3584 /* create new IS with global indexes without holes */ 3585 if (subset_mult) { 3586 const PetscInt* idxs_mult; 3587 PetscInt cum; 3588 3589 cum = 0; 3590 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3591 for (i=0;i<n;i++) { 3592 PetscInt j; 3593 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3594 } 3595 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3596 } else { 3597 for (i=0;i<n;i++) { 3598 gidxs[i] = leaf_data[i]-1; 3599 } 3600 } 3601 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3602 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3603 PetscFunctionReturn(0); 3604 } 3605 3606 #undef __FUNCT__ 3607 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3608 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3609 { 3610 PetscInt i,j; 3611 PetscScalar *alphas; 3612 PetscErrorCode ierr; 3613 3614 PetscFunctionBegin; 3615 /* this implements stabilized Gram-Schmidt */ 3616 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3617 for (i=0;i<n;i++) { 3618 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3619 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3620 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3621 } 3622 ierr = PetscFree(alphas);CHKERRQ(ierr); 3623 PetscFunctionReturn(0); 3624 } 3625 3626 #undef __FUNCT__ 3627 #define __FUNCT__ "MatISGetSubassemblingPattern" 3628 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3629 { 3630 IS ranks_send_to; 3631 PetscInt n_neighs,*neighs,*n_shared,**shared; 3632 PetscMPIInt size,rank,color; 3633 PetscInt *xadj,*adjncy; 3634 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3635 PetscInt i,local_size,threshold=0; 3636 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3637 PetscSubcomm subcomm; 3638 PetscErrorCode ierr; 3639 3640 PetscFunctionBegin; 3641 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3642 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3643 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3644 3645 /* Get info on mapping */ 3646 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3647 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3648 3649 /* build local CSR graph of subdomains' connectivity */ 3650 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3651 xadj[0] = 0; 3652 xadj[1] = PetscMax(n_neighs-1,0); 3653 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3654 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3655 3656 if (threshold) { 3657 PetscInt xadj_count = 0; 3658 for (i=1;i<n_neighs;i++) { 3659 if (n_shared[i] > threshold) { 3660 adjncy[xadj_count] = neighs[i]; 3661 adjncy_wgt[xadj_count] = n_shared[i]; 3662 xadj_count++; 3663 } 3664 } 3665 xadj[1] = xadj_count; 3666 } else { 3667 if (xadj[1]) { 3668 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3669 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3670 } 3671 } 3672 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3673 if (use_square) { 3674 for (i=0;i<xadj[1];i++) { 3675 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3676 } 3677 } 3678 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3679 3680 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3681 3682 /* 3683 Restrict work on active processes only. 3684 */ 3685 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3686 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3687 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3688 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3689 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3690 if (color) { 3691 ierr = PetscFree(xadj);CHKERRQ(ierr); 3692 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3693 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3694 } else { 3695 Mat subdomain_adj; 3696 IS new_ranks,new_ranks_contig; 3697 MatPartitioning partitioner; 3698 PetscInt prank,rstart=0,rend=0; 3699 PetscInt *is_indices,*oldranks; 3700 PetscBool aggregate; 3701 3702 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3703 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3704 prank = rank; 3705 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3706 /* 3707 for (i=0;i<size;i++) { 3708 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3709 } 3710 */ 3711 for (i=0;i<xadj[1];i++) { 3712 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3713 } 3714 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3715 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3716 if (aggregate) { 3717 PetscInt lrows,row,ncols,*cols; 3718 PetscMPIInt nrank; 3719 PetscScalar *vals; 3720 3721 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3722 lrows = 0; 3723 if (nrank<redprocs) { 3724 lrows = size/redprocs; 3725 if (nrank<size%redprocs) lrows++; 3726 } 3727 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3728 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3729 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3730 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3731 row = nrank; 3732 ncols = xadj[1]-xadj[0]; 3733 cols = adjncy; 3734 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3735 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3736 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3737 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3738 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3739 ierr = PetscFree(xadj);CHKERRQ(ierr); 3740 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3741 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3742 ierr = PetscFree(vals);CHKERRQ(ierr); 3743 } else { 3744 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3745 } 3746 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3747 3748 /* Partition */ 3749 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3750 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3751 if (use_vwgt) { 3752 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3753 v_wgt[0] = local_size; 3754 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3755 } 3756 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3757 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3758 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3759 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3760 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3761 3762 /* renumber new_ranks to avoid "holes" in new set of processors */ 3763 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3764 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3765 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3766 if (!redprocs) { 3767 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3768 } else { 3769 PetscInt idxs[1]; 3770 PetscMPIInt tag; 3771 MPI_Request *reqs; 3772 3773 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3774 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3775 for (i=rstart;i<rend;i++) { 3776 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3777 } 3778 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3779 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3780 ierr = PetscFree(reqs);CHKERRQ(ierr); 3781 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3782 } 3783 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3784 /* clean up */ 3785 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3786 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3787 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3788 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3789 } 3790 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3791 3792 /* assemble parallel IS for sends */ 3793 i = 1; 3794 if (color) i=0; 3795 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3796 /* get back IS */ 3797 *is_sends = ranks_send_to; 3798 PetscFunctionReturn(0); 3799 } 3800 3801 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3802 3803 #undef __FUNCT__ 3804 #define __FUNCT__ "MatISSubassemble" 3805 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3806 { 3807 Mat local_mat; 3808 IS is_sends_internal; 3809 PetscInt rows,cols,new_local_rows; 3810 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3811 PetscBool ismatis,isdense,newisdense,destroy_mat; 3812 ISLocalToGlobalMapping l2gmap; 3813 PetscInt* l2gmap_indices; 3814 const PetscInt* is_indices; 3815 MatType new_local_type; 3816 /* buffers */ 3817 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3818 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3819 PetscInt *recv_buffer_idxs_local; 3820 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3821 /* MPI */ 3822 MPI_Comm comm,comm_n; 3823 PetscSubcomm subcomm; 3824 PetscMPIInt n_sends,n_recvs,commsize; 3825 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3826 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3827 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3828 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3829 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3830 PetscErrorCode ierr; 3831 3832 PetscFunctionBegin; 3833 /* TODO: add missing checks */ 3834 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3835 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3836 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3837 PetscValidLogicalCollectiveInt(mat,nis,7); 3838 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3839 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3840 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3841 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3842 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3843 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3844 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3845 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3846 PetscInt mrows,mcols,mnrows,mncols; 3847 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3848 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3849 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3850 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3851 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3852 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3853 } 3854 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3855 PetscValidLogicalCollectiveInt(mat,bs,0); 3856 /* prepare IS for sending if not provided */ 3857 if (!is_sends) { 3858 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3859 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3860 } else { 3861 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3862 is_sends_internal = is_sends; 3863 } 3864 3865 /* get comm */ 3866 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3867 3868 /* compute number of sends */ 3869 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3870 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3871 3872 /* compute number of receives */ 3873 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3874 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3875 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3876 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3877 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3878 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3879 ierr = PetscFree(iflags);CHKERRQ(ierr); 3880 3881 /* restrict comm if requested */ 3882 subcomm = 0; 3883 destroy_mat = PETSC_FALSE; 3884 if (restrict_comm) { 3885 PetscMPIInt color,subcommsize; 3886 3887 color = 0; 3888 if (restrict_full) { 3889 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3890 } else { 3891 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3892 } 3893 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3894 subcommsize = commsize - subcommsize; 3895 /* check if reuse has been requested */ 3896 if (reuse == MAT_REUSE_MATRIX) { 3897 if (*mat_n) { 3898 PetscMPIInt subcommsize2; 3899 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3900 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3901 comm_n = PetscObjectComm((PetscObject)*mat_n); 3902 } else { 3903 comm_n = PETSC_COMM_SELF; 3904 } 3905 } else { /* MAT_INITIAL_MATRIX */ 3906 PetscMPIInt rank; 3907 3908 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3909 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3910 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3911 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3912 comm_n = PetscSubcommChild(subcomm); 3913 } 3914 /* flag to destroy *mat_n if not significative */ 3915 if (color) destroy_mat = PETSC_TRUE; 3916 } else { 3917 comm_n = comm; 3918 } 3919 3920 /* prepare send/receive buffers */ 3921 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3922 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3923 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3924 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3925 if (nis) { 3926 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3927 } 3928 3929 /* Get data from local matrices */ 3930 if (!isdense) { 3931 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3932 /* TODO: See below some guidelines on how to prepare the local buffers */ 3933 /* 3934 send_buffer_vals should contain the raw values of the local matrix 3935 send_buffer_idxs should contain: 3936 - MatType_PRIVATE type 3937 - PetscInt size_of_l2gmap 3938 - PetscInt global_row_indices[size_of_l2gmap] 3939 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3940 */ 3941 } else { 3942 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3943 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3944 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3945 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3946 send_buffer_idxs[1] = i; 3947 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3948 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3949 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3950 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3951 for (i=0;i<n_sends;i++) { 3952 ilengths_vals[is_indices[i]] = len*len; 3953 ilengths_idxs[is_indices[i]] = len+2; 3954 } 3955 } 3956 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3957 /* additional is (if any) */ 3958 if (nis) { 3959 PetscMPIInt psum; 3960 PetscInt j; 3961 for (j=0,psum=0;j<nis;j++) { 3962 PetscInt plen; 3963 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3964 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3965 psum += len+1; /* indices + lenght */ 3966 } 3967 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3968 for (j=0,psum=0;j<nis;j++) { 3969 PetscInt plen; 3970 const PetscInt *is_array_idxs; 3971 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3972 send_buffer_idxs_is[psum] = plen; 3973 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3974 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3975 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3976 psum += plen+1; /* indices + lenght */ 3977 } 3978 for (i=0;i<n_sends;i++) { 3979 ilengths_idxs_is[is_indices[i]] = psum; 3980 } 3981 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3982 } 3983 3984 buf_size_idxs = 0; 3985 buf_size_vals = 0; 3986 buf_size_idxs_is = 0; 3987 for (i=0;i<n_recvs;i++) { 3988 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3989 buf_size_vals += (PetscInt)olengths_vals[i]; 3990 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3991 } 3992 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3993 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3994 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3995 3996 /* get new tags for clean communications */ 3997 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3998 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3999 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4000 4001 /* allocate for requests */ 4002 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4003 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4004 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4005 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4006 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4007 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4008 4009 /* communications */ 4010 ptr_idxs = recv_buffer_idxs; 4011 ptr_vals = recv_buffer_vals; 4012 ptr_idxs_is = recv_buffer_idxs_is; 4013 for (i=0;i<n_recvs;i++) { 4014 source_dest = onodes[i]; 4015 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4016 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4017 ptr_idxs += olengths_idxs[i]; 4018 ptr_vals += olengths_vals[i]; 4019 if (nis) { 4020 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4021 ptr_idxs_is += olengths_idxs_is[i]; 4022 } 4023 } 4024 for (i=0;i<n_sends;i++) { 4025 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4026 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4027 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4028 if (nis) { 4029 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4030 } 4031 } 4032 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4033 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4034 4035 /* assemble new l2g map */ 4036 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4037 ptr_idxs = recv_buffer_idxs; 4038 new_local_rows = 0; 4039 for (i=0;i<n_recvs;i++) { 4040 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4041 ptr_idxs += olengths_idxs[i]; 4042 } 4043 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4044 ptr_idxs = recv_buffer_idxs; 4045 new_local_rows = 0; 4046 for (i=0;i<n_recvs;i++) { 4047 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4048 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4049 ptr_idxs += olengths_idxs[i]; 4050 } 4051 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4052 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4053 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4054 4055 /* infer new local matrix type from received local matrices type */ 4056 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4057 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4058 if (n_recvs) { 4059 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4060 ptr_idxs = recv_buffer_idxs; 4061 for (i=0;i<n_recvs;i++) { 4062 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4063 new_local_type_private = MATAIJ_PRIVATE; 4064 break; 4065 } 4066 ptr_idxs += olengths_idxs[i]; 4067 } 4068 switch (new_local_type_private) { 4069 case MATDENSE_PRIVATE: 4070 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4071 new_local_type = MATSEQAIJ; 4072 bs = 1; 4073 } else { /* if I receive only 1 dense matrix */ 4074 new_local_type = MATSEQDENSE; 4075 bs = 1; 4076 } 4077 break; 4078 case MATAIJ_PRIVATE: 4079 new_local_type = MATSEQAIJ; 4080 bs = 1; 4081 break; 4082 case MATBAIJ_PRIVATE: 4083 new_local_type = MATSEQBAIJ; 4084 break; 4085 case MATSBAIJ_PRIVATE: 4086 new_local_type = MATSEQSBAIJ; 4087 break; 4088 default: 4089 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4090 break; 4091 } 4092 } else { /* by default, new_local_type is seqdense */ 4093 new_local_type = MATSEQDENSE; 4094 bs = 1; 4095 } 4096 4097 /* create MATIS object if needed */ 4098 if (reuse == MAT_INITIAL_MATRIX) { 4099 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4100 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4101 } else { 4102 /* it also destroys the local matrices */ 4103 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4104 } 4105 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4106 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4107 4108 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4109 4110 /* Global to local map of received indices */ 4111 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4112 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4113 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4114 4115 /* restore attributes -> type of incoming data and its size */ 4116 buf_size_idxs = 0; 4117 for (i=0;i<n_recvs;i++) { 4118 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4119 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4120 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4121 } 4122 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4123 4124 /* set preallocation */ 4125 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4126 if (!newisdense) { 4127 PetscInt *new_local_nnz=0; 4128 4129 ptr_vals = recv_buffer_vals; 4130 ptr_idxs = recv_buffer_idxs_local; 4131 if (n_recvs) { 4132 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4133 } 4134 for (i=0;i<n_recvs;i++) { 4135 PetscInt j; 4136 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4137 for (j=0;j<*(ptr_idxs+1);j++) { 4138 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4139 } 4140 } else { 4141 /* TODO */ 4142 } 4143 ptr_idxs += olengths_idxs[i]; 4144 } 4145 if (new_local_nnz) { 4146 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4147 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4148 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4149 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4150 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4151 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4152 } else { 4153 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4154 } 4155 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4156 } else { 4157 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4158 } 4159 4160 /* set values */ 4161 ptr_vals = recv_buffer_vals; 4162 ptr_idxs = recv_buffer_idxs_local; 4163 for (i=0;i<n_recvs;i++) { 4164 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4165 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4166 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4167 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4168 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4169 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4170 } else { 4171 /* TODO */ 4172 } 4173 ptr_idxs += olengths_idxs[i]; 4174 ptr_vals += olengths_vals[i]; 4175 } 4176 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4177 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4178 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4179 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4180 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4181 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4182 4183 #if 0 4184 if (!restrict_comm) { /* check */ 4185 Vec lvec,rvec; 4186 PetscReal infty_error; 4187 4188 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4189 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4190 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4191 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4192 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4193 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4194 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4195 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4196 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4197 } 4198 #endif 4199 4200 /* assemble new additional is (if any) */ 4201 if (nis) { 4202 PetscInt **temp_idxs,*count_is,j,psum; 4203 4204 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4205 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4206 ptr_idxs = recv_buffer_idxs_is; 4207 psum = 0; 4208 for (i=0;i<n_recvs;i++) { 4209 for (j=0;j<nis;j++) { 4210 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4211 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4212 psum += plen; 4213 ptr_idxs += plen+1; /* shift pointer to received data */ 4214 } 4215 } 4216 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4217 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4218 for (i=1;i<nis;i++) { 4219 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4220 } 4221 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4222 ptr_idxs = recv_buffer_idxs_is; 4223 for (i=0;i<n_recvs;i++) { 4224 for (j=0;j<nis;j++) { 4225 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4226 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4227 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4228 ptr_idxs += plen+1; /* shift pointer to received data */ 4229 } 4230 } 4231 for (i=0;i<nis;i++) { 4232 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4233 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4234 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4235 } 4236 ierr = PetscFree(count_is);CHKERRQ(ierr); 4237 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4238 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4239 } 4240 /* free workspace */ 4241 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4242 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4243 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4244 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4245 if (isdense) { 4246 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4247 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4248 } else { 4249 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4250 } 4251 if (nis) { 4252 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4253 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4254 } 4255 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4256 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4257 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4258 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4259 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4260 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4261 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4262 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4263 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4264 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4265 ierr = PetscFree(onodes);CHKERRQ(ierr); 4266 if (nis) { 4267 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4268 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4269 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4270 } 4271 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4272 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4273 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4274 for (i=0;i<nis;i++) { 4275 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4276 } 4277 *mat_n = NULL; 4278 } 4279 PetscFunctionReturn(0); 4280 } 4281 4282 /* temporary hack into ksp private data structure */ 4283 #include <petsc/private/kspimpl.h> 4284 4285 #undef __FUNCT__ 4286 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4287 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4288 { 4289 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4290 PC_IS *pcis = (PC_IS*)pc->data; 4291 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4292 MatNullSpace CoarseNullSpace=NULL; 4293 ISLocalToGlobalMapping coarse_islg; 4294 IS coarse_is,*isarray; 4295 PetscInt i,im_active=-1,active_procs=-1; 4296 PetscInt nis,nisdofs,nisneu; 4297 PC pc_temp; 4298 PCType coarse_pc_type; 4299 KSPType coarse_ksp_type; 4300 PetscBool multilevel_requested,multilevel_allowed; 4301 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4302 Mat t_coarse_mat_is; 4303 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4304 PetscMPIInt all_procs; 4305 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4306 PetscBool compute_vecs = PETSC_FALSE; 4307 PetscScalar *array; 4308 PetscErrorCode ierr; 4309 4310 PetscFunctionBegin; 4311 /* Assign global numbering to coarse dofs */ 4312 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4313 PetscInt ocoarse_size; 4314 compute_vecs = PETSC_TRUE; 4315 ocoarse_size = pcbddc->coarse_size; 4316 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4317 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4318 /* see if we can avoid some work */ 4319 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4320 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4321 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4322 PC pc; 4323 PetscBool isbddc; 4324 4325 /* temporary workaround since PCBDDC does not have a reset method so far */ 4326 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4327 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4328 if (isbddc) { 4329 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4330 } 4331 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4332 coarse_reuse = PETSC_FALSE; 4333 } else { /* we can safely reuse already computed coarse matrix */ 4334 coarse_reuse = PETSC_TRUE; 4335 } 4336 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4337 coarse_reuse = PETSC_FALSE; 4338 } 4339 /* reset any subassembling information */ 4340 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4341 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4342 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4343 coarse_reuse = PETSC_TRUE; 4344 } 4345 4346 /* count "active" (i.e. with positive local size) and "void" processes */ 4347 im_active = !!(pcis->n); 4348 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4349 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4350 void_procs = all_procs-active_procs; 4351 csin_type_simple = PETSC_TRUE; 4352 redist = PETSC_FALSE; 4353 if (pcbddc->current_level && void_procs) { 4354 csin_ml = PETSC_TRUE; 4355 ncoarse_ml = void_procs; 4356 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4357 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4358 csin_ds = PETSC_TRUE; 4359 ncoarse_ds = pcbddc->redistribute_coarse; 4360 redist = PETSC_TRUE; 4361 } else { 4362 csin_ds = PETSC_TRUE; 4363 ncoarse_ds = active_procs; 4364 redist = PETSC_TRUE; 4365 } 4366 } else { 4367 csin_ml = PETSC_FALSE; 4368 ncoarse_ml = all_procs; 4369 if (void_procs) { 4370 csin_ds = PETSC_TRUE; 4371 ncoarse_ds = void_procs; 4372 csin_type_simple = PETSC_FALSE; 4373 } else { 4374 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4375 csin_ds = PETSC_TRUE; 4376 ncoarse_ds = pcbddc->redistribute_coarse; 4377 redist = PETSC_TRUE; 4378 } else { 4379 csin_ds = PETSC_FALSE; 4380 ncoarse_ds = all_procs; 4381 } 4382 } 4383 } 4384 4385 /* 4386 test if we can go multilevel: three conditions must be satisfied: 4387 - we have not exceeded the number of levels requested 4388 - we can actually subassemble the active processes 4389 - we can find a suitable number of MPI processes where we can place the subassembled problem 4390 */ 4391 multilevel_allowed = PETSC_FALSE; 4392 multilevel_requested = PETSC_FALSE; 4393 if (pcbddc->current_level < pcbddc->max_levels) { 4394 multilevel_requested = PETSC_TRUE; 4395 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4396 multilevel_allowed = PETSC_FALSE; 4397 } else { 4398 multilevel_allowed = PETSC_TRUE; 4399 } 4400 } 4401 /* determine number of process partecipating to coarse solver */ 4402 if (multilevel_allowed) { 4403 ncoarse = ncoarse_ml; 4404 csin = csin_ml; 4405 redist = PETSC_FALSE; 4406 } else { 4407 ncoarse = ncoarse_ds; 4408 csin = csin_ds; 4409 } 4410 4411 /* creates temporary l2gmap and IS for coarse indexes */ 4412 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4413 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4414 4415 /* creates temporary MATIS object for coarse matrix */ 4416 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4417 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4418 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4419 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4420 #if 0 4421 { 4422 PetscViewer viewer; 4423 char filename[256]; 4424 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4425 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4426 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4427 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4428 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4429 } 4430 #endif 4431 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4432 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4433 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4434 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4435 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4436 4437 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4438 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4439 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4440 const PetscInt *idxs; 4441 ISLocalToGlobalMapping tmap; 4442 4443 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4444 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4445 /* allocate space for temporary storage */ 4446 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4447 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4448 /* allocate for IS array */ 4449 nisdofs = pcbddc->n_ISForDofsLocal; 4450 nisneu = !!pcbddc->NeumannBoundariesLocal; 4451 nis = nisdofs + nisneu; 4452 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4453 /* dofs splitting */ 4454 for (i=0;i<nisdofs;i++) { 4455 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4456 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4457 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4458 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4459 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4460 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4461 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4462 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4463 } 4464 /* neumann boundaries */ 4465 if (pcbddc->NeumannBoundariesLocal) { 4466 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4467 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4468 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4469 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4470 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4471 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4472 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4473 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4474 } 4475 /* free memory */ 4476 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4477 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4478 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4479 } else { 4480 nis = 0; 4481 nisdofs = 0; 4482 nisneu = 0; 4483 isarray = NULL; 4484 } 4485 /* destroy no longer needed map */ 4486 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4487 4488 /* restrict on coarse candidates (if needed) */ 4489 coarse_mat_is = NULL; 4490 if (csin) { 4491 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4492 if (redist) { 4493 PetscMPIInt rank; 4494 PetscInt spc,n_spc_p1,dest[1],destsize; 4495 4496 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4497 spc = active_procs/ncoarse; 4498 n_spc_p1 = active_procs%ncoarse; 4499 if (im_active) { 4500 destsize = 1; 4501 if (rank > n_spc_p1*(spc+1)-1) { 4502 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4503 } else { 4504 dest[0] = rank/(spc+1); 4505 } 4506 } else { 4507 destsize = 0; 4508 } 4509 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4510 } else if (csin_type_simple) { 4511 PetscMPIInt rank; 4512 PetscInt issize,isidx; 4513 4514 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4515 if (im_active) { 4516 issize = 1; 4517 isidx = (PetscInt)rank; 4518 } else { 4519 issize = 0; 4520 isidx = -1; 4521 } 4522 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4523 } else { /* get a suitable subassembling pattern from MATIS code */ 4524 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4525 } 4526 4527 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4528 if (!redist || ncoarse <= void_procs) { 4529 PetscInt ncoarse_cand,tissize,*nisindices; 4530 PetscInt *coarse_candidates; 4531 const PetscInt* tisindices; 4532 4533 /* get coarse candidates' ranks in pc communicator */ 4534 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4535 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4536 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4537 if (!coarse_candidates[i]) { 4538 coarse_candidates[ncoarse_cand++]=i; 4539 } 4540 } 4541 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4542 4543 4544 if (pcbddc->dbg_flag) { 4545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4547 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4548 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4549 for (i=0;i<ncoarse_cand;i++) { 4550 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4551 } 4552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4553 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4554 } 4555 /* shift the pattern on coarse candidates */ 4556 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4557 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4558 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4559 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4560 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4561 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4562 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4563 } 4564 if (pcbddc->dbg_flag) { 4565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4567 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4568 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4569 } 4570 } 4571 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4572 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4573 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4574 } else { /* this is the last level, so use just receiving processes in subcomm */ 4575 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4576 } 4577 } else { 4578 if (pcbddc->dbg_flag) { 4579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4581 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4582 } 4583 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4584 coarse_mat_is = t_coarse_mat_is; 4585 } 4586 4587 /* create local to global scatters for coarse problem */ 4588 if (compute_vecs) { 4589 PetscInt lrows; 4590 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4591 if (coarse_mat_is) { 4592 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4593 } else { 4594 lrows = 0; 4595 } 4596 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4597 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4598 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4599 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4600 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4601 } 4602 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4603 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4604 4605 /* set defaults for coarse KSP and PC */ 4606 if (multilevel_allowed) { 4607 coarse_ksp_type = KSPRICHARDSON; 4608 coarse_pc_type = PCBDDC; 4609 } else { 4610 coarse_ksp_type = KSPPREONLY; 4611 coarse_pc_type = PCREDUNDANT; 4612 } 4613 4614 /* print some info if requested */ 4615 if (pcbddc->dbg_flag) { 4616 if (!multilevel_allowed) { 4617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4618 if (multilevel_requested) { 4619 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4620 } else if (pcbddc->max_levels) { 4621 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4622 } 4623 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4624 } 4625 } 4626 4627 /* create the coarse KSP object only once with defaults */ 4628 if (coarse_mat_is) { 4629 MatReuse coarse_mat_reuse; 4630 PetscViewer dbg_viewer = NULL; 4631 if (pcbddc->dbg_flag) { 4632 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4633 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4634 } 4635 if (!pcbddc->coarse_ksp) { 4636 char prefix[256],str_level[16]; 4637 size_t len; 4638 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4639 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4640 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4641 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4642 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4643 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4644 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4645 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4646 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4647 /* prefix */ 4648 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4649 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4650 if (!pcbddc->current_level) { 4651 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4652 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4653 } else { 4654 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4655 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4656 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4657 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4658 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4659 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4660 } 4661 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4662 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4663 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4664 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4665 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4666 /* allow user customization */ 4667 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4668 } 4669 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4670 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4671 if (nisdofs) { 4672 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4673 for (i=0;i<nisdofs;i++) { 4674 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4675 } 4676 } 4677 if (nisneu) { 4678 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4679 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4680 } 4681 4682 /* get some info after set from options */ 4683 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4684 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4685 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4686 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4687 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4688 isbddc = PETSC_FALSE; 4689 } 4690 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4691 if (isredundant) { 4692 KSP inner_ksp; 4693 PC inner_pc; 4694 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4695 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4696 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4697 } 4698 4699 /* assemble coarse matrix */ 4700 if (coarse_reuse) { 4701 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4702 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4703 coarse_mat_reuse = MAT_REUSE_MATRIX; 4704 } else { 4705 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4706 } 4707 if (isbddc || isnn) { 4708 if (pcbddc->coarsening_ratio > 1) { 4709 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4710 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4711 if (pcbddc->dbg_flag) { 4712 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4713 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4714 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4715 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4716 } 4717 } 4718 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4719 } else { 4720 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4721 coarse_mat = coarse_mat_is; 4722 } 4723 } else { 4724 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4725 } 4726 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4727 4728 /* propagate symmetry info of coarse matrix */ 4729 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4730 if (pc->pmat->symmetric_set) { 4731 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4732 } 4733 if (pc->pmat->hermitian_set) { 4734 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4735 } 4736 if (pc->pmat->spd_set) { 4737 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4738 } 4739 /* set operators */ 4740 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4741 if (pcbddc->dbg_flag) { 4742 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4743 } 4744 } else { /* processes non partecipating to coarse solver (if any) */ 4745 coarse_mat = 0; 4746 } 4747 ierr = PetscFree(isarray);CHKERRQ(ierr); 4748 #if 0 4749 { 4750 PetscViewer viewer; 4751 char filename[256]; 4752 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4753 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4754 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4755 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4756 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4757 } 4758 #endif 4759 4760 /* Compute coarse null space (special handling by BDDC only) */ 4761 #if 0 4762 if (pcbddc->NullSpace) { 4763 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4764 } 4765 #endif 4766 4767 if (pcbddc->coarse_ksp) { 4768 Vec crhs,csol; 4769 PetscBool ispreonly; 4770 4771 if (CoarseNullSpace) { 4772 if (isbddc) { 4773 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4774 } else { 4775 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4776 } 4777 } 4778 /* setup coarse ksp */ 4779 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4780 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4781 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4782 /* hack */ 4783 if (!csol) { 4784 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4785 } 4786 if (!crhs) { 4787 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4788 } 4789 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4790 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4791 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4792 KSP check_ksp; 4793 KSPType check_ksp_type; 4794 PC check_pc; 4795 Vec check_vec,coarse_vec; 4796 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4797 PetscInt its; 4798 PetscBool compute_eigs; 4799 PetscReal *eigs_r,*eigs_c; 4800 PetscInt neigs; 4801 const char *prefix; 4802 4803 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4804 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4805 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4806 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4807 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4808 if (ispreonly) { 4809 check_ksp_type = KSPPREONLY; 4810 compute_eigs = PETSC_FALSE; 4811 } else { 4812 check_ksp_type = KSPGMRES; 4813 compute_eigs = PETSC_TRUE; 4814 } 4815 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4816 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4817 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4818 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4819 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4820 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4821 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4822 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4823 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4824 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4825 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4826 /* create random vec */ 4827 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4828 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4829 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4830 if (CoarseNullSpace) { 4831 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4832 } 4833 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4834 /* solve coarse problem */ 4835 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4836 if (CoarseNullSpace) { 4837 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4838 } 4839 /* set eigenvalue estimation if preonly has not been requested */ 4840 if (compute_eigs) { 4841 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4842 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4843 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4844 lambda_max = eigs_r[neigs-1]; 4845 lambda_min = eigs_r[0]; 4846 if (pcbddc->use_coarse_estimates) { 4847 if (lambda_max>lambda_min) { 4848 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4849 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4850 } 4851 } 4852 } 4853 4854 /* check coarse problem residual error */ 4855 if (pcbddc->dbg_flag) { 4856 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4857 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4858 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4859 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4860 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4861 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4862 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4863 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4864 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4865 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4866 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4867 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4868 if (compute_eigs) { 4869 PetscReal lambda_max_s,lambda_min_s; 4870 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4871 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4872 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4873 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4874 for (i=0;i<neigs;i++) { 4875 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4876 } 4877 } 4878 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4879 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4880 } 4881 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4882 if (compute_eigs) { 4883 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4884 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4885 } 4886 } 4887 } 4888 /* print additional info */ 4889 if (pcbddc->dbg_flag) { 4890 /* waits until all processes reaches this point */ 4891 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4892 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4894 } 4895 4896 /* free memory */ 4897 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4898 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4899 PetscFunctionReturn(0); 4900 } 4901 4902 #undef __FUNCT__ 4903 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4904 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4905 { 4906 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4907 PC_IS* pcis = (PC_IS*)pc->data; 4908 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4909 IS subset,subset_mult,subset_n; 4910 PetscInt local_size,coarse_size=0; 4911 PetscInt *local_primal_indices=NULL; 4912 const PetscInt *t_local_primal_indices; 4913 PetscErrorCode ierr; 4914 4915 PetscFunctionBegin; 4916 /* Compute global number of coarse dofs */ 4917 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4918 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4919 } 4920 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4921 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4922 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4923 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4924 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4925 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4926 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4927 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4928 if (local_size != pcbddc->local_primal_size) { 4929 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4930 } 4931 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4932 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4933 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4934 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4935 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4936 4937 /* check numbering */ 4938 if (pcbddc->dbg_flag) { 4939 PetscScalar coarsesum,*array,*array2; 4940 PetscInt i; 4941 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4942 4943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4944 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4946 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4947 /* counter */ 4948 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4949 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4950 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4951 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4952 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4953 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4954 4955 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4956 for (i=0;i<pcbddc->local_primal_size;i++) { 4957 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4958 } 4959 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4960 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4961 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4962 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4963 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4964 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4965 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4966 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4967 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4968 for (i=0;i<pcis->n;i++) { 4969 if (array[i] != 0.0 && array[i] != array2[i]) { 4970 PetscInt owned = (PetscInt)(array[i]); 4971 PetscInt neigh = (PetscInt)(array2[i]); 4972 set_error = PETSC_TRUE; 4973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4974 } 4975 } 4976 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4977 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4978 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4979 for (i=0;i<pcis->n;i++) { 4980 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4981 } 4982 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4983 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4984 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4985 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4986 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4988 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4989 PetscInt *gidxs; 4990 4991 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4992 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4996 for (i=0;i<pcbddc->local_primal_size;i++) { 4997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4998 } 4999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5000 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5001 } 5002 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5003 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5004 } 5005 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5006 /* get back data */ 5007 *coarse_size_n = coarse_size; 5008 *local_primal_indices_n = local_primal_indices; 5009 PetscFunctionReturn(0); 5010 } 5011 5012 #undef __FUNCT__ 5013 #define __FUNCT__ "PCBDDCGlobalToLocal" 5014 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5015 { 5016 IS localis_t; 5017 PetscInt i,lsize,*idxs,n; 5018 PetscScalar *vals; 5019 PetscErrorCode ierr; 5020 5021 PetscFunctionBegin; 5022 /* get indices in local ordering exploiting local to global map */ 5023 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5024 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5025 for (i=0;i<lsize;i++) vals[i] = 1.0; 5026 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5027 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5028 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5029 if (idxs) { /* multilevel guard */ 5030 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5031 } 5032 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5033 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5034 ierr = PetscFree(vals);CHKERRQ(ierr); 5035 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5036 /* now compute set in local ordering */ 5037 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5038 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5039 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5040 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5041 for (i=0,lsize=0;i<n;i++) { 5042 if (PetscRealPart(vals[i]) > 0.5) { 5043 lsize++; 5044 } 5045 } 5046 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5047 for (i=0,lsize=0;i<n;i++) { 5048 if (PetscRealPart(vals[i]) > 0.5) { 5049 idxs[lsize++] = i; 5050 } 5051 } 5052 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5053 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5054 *localis = localis_t; 5055 PetscFunctionReturn(0); 5056 } 5057 5058 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5059 #undef __FUNCT__ 5060 #define __FUNCT__ "PCBDDCMatMult_Private" 5061 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5062 { 5063 PCBDDCChange_ctx change_ctx; 5064 PetscErrorCode ierr; 5065 5066 PetscFunctionBegin; 5067 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5068 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5069 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5070 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 #undef __FUNCT__ 5075 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5076 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5077 { 5078 PCBDDCChange_ctx change_ctx; 5079 PetscErrorCode ierr; 5080 5081 PetscFunctionBegin; 5082 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5083 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5084 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5085 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5086 PetscFunctionReturn(0); 5087 } 5088 5089 #undef __FUNCT__ 5090 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5091 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5092 { 5093 PC_IS *pcis=(PC_IS*)pc->data; 5094 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5095 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5096 Mat S_j; 5097 PetscInt *used_xadj,*used_adjncy; 5098 PetscBool free_used_adj; 5099 PetscErrorCode ierr; 5100 5101 PetscFunctionBegin; 5102 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5103 free_used_adj = PETSC_FALSE; 5104 if (pcbddc->sub_schurs_layers == -1) { 5105 used_xadj = NULL; 5106 used_adjncy = NULL; 5107 } else { 5108 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5109 used_xadj = pcbddc->mat_graph->xadj; 5110 used_adjncy = pcbddc->mat_graph->adjncy; 5111 } else if (pcbddc->computed_rowadj) { 5112 used_xadj = pcbddc->mat_graph->xadj; 5113 used_adjncy = pcbddc->mat_graph->adjncy; 5114 } else { 5115 PetscBool flg_row=PETSC_FALSE; 5116 const PetscInt *xadj,*adjncy; 5117 PetscInt nvtxs; 5118 5119 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5120 if (flg_row) { 5121 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5122 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5123 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5124 free_used_adj = PETSC_TRUE; 5125 } else { 5126 pcbddc->sub_schurs_layers = -1; 5127 used_xadj = NULL; 5128 used_adjncy = NULL; 5129 } 5130 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5131 } 5132 } 5133 5134 /* setup sub_schurs data */ 5135 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5136 if (!sub_schurs->use_mumps) { 5137 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5138 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5139 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5140 } else { 5141 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5142 PetscBool isseqaij; 5143 if (!pcbddc->use_vertices && reuse_solvers) { 5144 PetscInt n_vertices; 5145 5146 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5147 reuse_solvers = (PetscBool)!n_vertices; 5148 } 5149 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5150 if (!isseqaij) { 5151 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5152 if (matis->A == pcbddc->local_mat) { 5153 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5154 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5155 } else { 5156 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5157 } 5158 } 5159 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5160 } 5161 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5162 5163 /* free adjacency */ 5164 if (free_used_adj) { 5165 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5166 } 5167 PetscFunctionReturn(0); 5168 } 5169 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "PCBDDCInitSubSchurs" 5172 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5173 { 5174 PC_IS *pcis=(PC_IS*)pc->data; 5175 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5176 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5177 PCBDDCGraph graph; 5178 PetscErrorCode ierr; 5179 5180 PetscFunctionBegin; 5181 /* attach interface graph for determining subsets */ 5182 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5183 IS verticesIS,verticescomm; 5184 PetscInt vsize,*idxs; 5185 5186 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5187 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5188 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5189 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5190 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5191 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5192 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5193 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5194 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5195 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5196 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5197 /* 5198 if (pcbddc->dbg_flag) { 5199 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5200 } 5201 */ 5202 } else { 5203 graph = pcbddc->mat_graph; 5204 } 5205 5206 /* sub_schurs init */ 5207 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5208 5209 /* free graph struct */ 5210 if (pcbddc->sub_schurs_rebuild) { 5211 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5212 } 5213 PetscFunctionReturn(0); 5214 } 5215