1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 */ 53 if (pcbddc->benign_p0_lidx < 0) { 54 PetscFunctionReturn(0); 55 } 56 if (pop) { 57 const PetscInt *cB0_cols; 58 PetscInt cB0_ncol; 59 const PetscScalar *cB0_vals; 60 61 /* extract B_0 */ 62 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 pcbddc->B0_ncol = cB0_ncol; 64 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 65 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 68 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 69 /* remove rows and cols from local problem */ 70 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 72 } else { /* push */ 73 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 74 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 77 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 } 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCAdaptiveSelection" 84 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 85 { 86 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 87 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 88 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 89 PetscBLASInt *B_iwork,*B_ifail; 90 PetscScalar *work,lwork; 91 PetscScalar *St,*S,*eigv; 92 PetscScalar *Sarray,*Starray; 93 PetscReal *eigs,thresh; 94 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 95 PetscBool allocated_S_St; 96 #if defined(PETSC_USE_COMPLEX) 97 PetscReal *rwork; 98 #endif 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 if (!sub_schurs->use_mumps) { 103 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 104 } 105 106 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 107 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 108 } 109 110 if (pcbddc->dbg_flag) { 111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 115 } 116 117 if (pcbddc->dbg_flag) { 118 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 119 } 120 121 /* max size of subsets */ 122 mss = 0; 123 for (i=0;i<sub_schurs->n_subs;i++) { 124 PetscInt subset_size; 125 126 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 127 mss = PetscMax(mss,subset_size); 128 } 129 130 /* min/max and threshold */ 131 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 132 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 133 nmax = PetscMax(nmin,nmax); 134 allocated_S_St = PETSC_FALSE; 135 if (nmin) { 136 allocated_S_St = PETSC_TRUE; 137 } 138 139 /* allocate lapack workspace */ 140 cum = cum2 = 0; 141 maxneigs = 0; 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt n,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 n = PetscMin(subset_size,nmax); 147 cum += subset_size; 148 cum2 += subset_size*n; 149 maxneigs = PetscMax(maxneigs,n); 150 } 151 if (mss) { 152 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 153 PetscBLASInt B_itype = 1; 154 PetscBLASInt B_N = mss; 155 PetscReal zero = 0.0; 156 PetscReal eps = 0.0; /* dlamch? */ 157 158 B_lwork = -1; 159 S = NULL; 160 St = NULL; 161 eigs = NULL; 162 eigv = NULL; 163 B_iwork = NULL; 164 B_ifail = NULL; 165 #if defined(PETSC_USE_COMPLEX) 166 rwork = NULL; 167 #endif 168 thresh = 1.0; 169 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 170 #if defined(PETSC_USE_COMPLEX) 171 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 172 #else 173 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 174 #endif 175 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 176 ierr = PetscFPTrapPop();CHKERRQ(ierr); 177 } else { 178 /* TODO */ 179 } 180 } else { 181 lwork = 0; 182 } 183 184 nv = 0; 185 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 186 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 187 } 188 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 189 if (allocated_S_St) { 190 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 191 } 192 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 193 #if defined(PETSC_USE_COMPLEX) 194 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 195 #endif 196 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 197 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 199 nv+cum,&pcbddc->adaptive_constraints_idxs, 200 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 201 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 202 203 maxneigs = 0; 204 cum = cum2 = cumarray = 0; 205 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 206 pcbddc->adaptive_constraints_data_ptr[0] = 0; 207 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 208 const PetscInt *idxs; 209 210 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 211 for (cum=0;cum<nv;cum++) { 212 pcbddc->adaptive_constraints_n[cum] = 1; 213 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 214 pcbddc->adaptive_constraints_data[cum] = 1.0; 215 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 216 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 217 } 218 cum2 = cum; 219 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 220 } 221 222 if (mss) { /* multilevel */ 223 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 225 } 226 227 for (i=0;i<sub_schurs->n_subs;i++) { 228 229 const PetscInt *idxs; 230 PetscReal infty = PETSC_MAX_REAL; 231 PetscInt j,subset_size,eigs_start = 0; 232 PetscBLASInt B_N; 233 PetscBool same_data = PETSC_FALSE; 234 235 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 236 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 237 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 238 if (sub_schurs->is_hermitian) { 239 PetscInt j,k; 240 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 241 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 for (j=0;j<subset_size;j++) { 245 for (k=j;k<subset_size;k++) { 246 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 247 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 248 } 249 } 250 } else { 251 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 } else { 255 S = Sarray + cumarray; 256 St = Starray + cumarray; 257 } 258 259 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 260 /* see if we can save some work */ 261 if (sub_schurs->n_subs == 1) { 262 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 263 } 264 265 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 266 B_neigs = 0; 267 } else { 268 /* Threshold: this is an heuristic for edges */ 269 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 270 271 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 272 PetscBLASInt B_itype = 1; 273 PetscBLASInt B_IL, B_IU; 274 PetscReal eps = -1.0; /* dlamch? */ 275 PetscInt nmin_s; 276 277 /* ask for eigenvalues larger than thresh */ 278 if (pcbddc->dbg_flag) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 280 } 281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 282 #if defined(PETSC_USE_COMPLEX) 283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 284 #else 285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 286 #endif 287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 288 if (B_ierr) { 289 if (B_ierr < 0 ) { 290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 291 } else if (B_ierr <= B_N) { 292 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 293 } else { 294 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 295 } 296 } 297 298 if (B_neigs > nmax) { 299 if (pcbddc->dbg_flag) { 300 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 301 } 302 eigs_start = B_neigs -nmax; 303 B_neigs = nmax; 304 } 305 306 nmin_s = PetscMin(nmin,B_N); 307 if (B_neigs < nmin_s) { 308 PetscBLASInt B_neigs2; 309 310 B_IU = B_N - B_neigs; 311 B_IL = B_N - nmin_s + 1; 312 if (pcbddc->dbg_flag) { 313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 314 } 315 if (sub_schurs->is_hermitian) { 316 PetscInt j; 317 for (j=0;j<subset_size;j++) { 318 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 319 } 320 for (j=0;j<subset_size;j++) { 321 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 322 } 323 } else { 324 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 326 } 327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 330 #else 331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 332 #endif 333 ierr = PetscFPTrapPop();CHKERRQ(ierr); 334 B_neigs += B_neigs2; 335 } 336 if (B_ierr) { 337 if (B_ierr < 0 ) { 338 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 339 } else if (B_ierr <= B_N) { 340 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 341 } else { 342 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 343 } 344 } 345 if (pcbddc->dbg_flag) { 346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 347 for (j=0;j<B_neigs;j++) { 348 if (eigs[j] == 0.0) { 349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 352 } 353 } 354 } 355 } else { 356 /* TODO */ 357 } 358 } 359 maxneigs = PetscMax(B_neigs,maxneigs); 360 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 361 if (B_neigs) { 362 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 363 364 if (pcbddc->dbg_flag > 1) { 365 PetscInt ii; 366 for (ii=0;ii<B_neigs;ii++) { 367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 368 for (j=0;j<B_N;j++) { 369 #if defined(PETSC_USE_COMPLEX) 370 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 371 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 373 #else 374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 375 #endif 376 } 377 } 378 } 379 #if 0 380 for (j=0;j<B_neigs;j++) { 381 PetscBLASInt Blas_N,Blas_one = 1.0; 382 PetscScalar norm; 383 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 384 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 385 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 387 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 388 } else { 389 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 390 } 391 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 392 } 393 #endif 394 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 395 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 396 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 397 cum++; 398 } 399 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 400 /* shift for next computation */ 401 cumarray += subset_size*subset_size; 402 } 403 if (pcbddc->dbg_flag) { 404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 405 } 406 407 if (mss) { 408 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 409 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 410 /* destroy matrices (junk) */ 411 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 412 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 413 } 414 if (allocated_S_St) { 415 ierr = PetscFree2(S,St);CHKERRQ(ierr); 416 } 417 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 418 #if defined(PETSC_USE_COMPLEX) 419 ierr = PetscFree(rwork);CHKERRQ(ierr); 420 #endif 421 if (pcbddc->dbg_flag) { 422 PetscInt maxneigs_r; 423 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCSetUpSolvers" 431 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 432 { 433 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *coarse_submat_vals; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 /* Setup local scatters R_to_B and (optionally) R_to_D */ 439 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 440 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 441 442 /* Setup local neumann solver ksp_R */ 443 /* PCBDDCSetUpLocalScatters should be called first! */ 444 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 445 446 /* Change global null space passed in by the user if change of basis has been requested */ 447 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 448 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 449 } 450 451 /* 452 Setup local correction and local part of coarse basis. 453 Gives back the dense local part of the coarse matrix in column major ordering 454 */ 455 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 456 457 /* Compute total number of coarse nodes and setup coarse solver */ 458 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 459 460 /* free */ 461 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "PCBDDCResetCustomization" 467 PetscErrorCode PCBDDCResetCustomization(PC pc) 468 { 469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 474 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 475 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 476 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 477 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 478 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 479 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 480 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 481 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 482 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 483 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "PCBDDCResetTopography" 489 PetscErrorCode PCBDDCResetTopography(PC pc) 490 { 491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 496 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 497 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 498 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 499 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCResetSolvers" 505 PetscErrorCode PCBDDCResetSolvers(PC pc) 506 { 507 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 508 PetscScalar *array; 509 PetscErrorCode ierr; 510 511 PetscFunctionBegin; 512 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 513 if (pcbddc->coarse_phi_B) { 514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 515 ierr = PetscFree(array);CHKERRQ(ierr); 516 } 517 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 519 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 521 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 523 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 524 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 525 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 526 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 527 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 528 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 529 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 531 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 532 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 535 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 536 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 537 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 538 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 539 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 540 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 541 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 542 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 543 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 544 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 550 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 551 { 552 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 553 PC_IS *pcis = (PC_IS*)pc->data; 554 VecType impVecType; 555 PetscInt n_constraints,n_R,old_size,n_benign; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 if (!pcbddc->ConstraintMatrix) { 560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 561 } 562 /* get sizes */ 563 n_benign = 0; 564 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 565 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 566 if (n_benign && n_constraints) { 567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 568 } 569 n_R = pcis->n - pcbddc->n_vertices; 570 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 571 /* local work vectors (try to avoid unneeded work)*/ 572 /* R nodes */ 573 old_size = -1; 574 if (pcbddc->vec1_R) { 575 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 576 } 577 if (n_R != old_size) { 578 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 579 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 580 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 581 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 582 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 583 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 584 } 585 /* local primal dofs */ 586 old_size = -1; 587 if (pcbddc->vec1_P) { 588 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 589 } 590 if (pcbddc->local_primal_size != old_size) { 591 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 592 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 593 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 594 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 595 } 596 /* local explicit constraints */ 597 old_size = -1; 598 if (pcbddc->vec1_C) { 599 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 600 } 601 if (n_constraints && n_constraints != old_size) { 602 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 603 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 604 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 605 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 606 } 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNCT__ 611 #define __FUNCT__ "PCBDDCSetUpCorrection" 612 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 613 { 614 PetscErrorCode ierr; 615 /* pointers to pcis and pcbddc */ 616 PC_IS* pcis = (PC_IS*)pc->data; 617 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 618 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 619 /* submatrices of local problem */ 620 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 621 /* submatrices of benign trick */ 622 Mat B0_R,B0_V; 623 /* submatrices of local coarse problem */ 624 Mat S_VV,S_CV,S_VC,S_CC; 625 /* working matrices */ 626 Mat C_CR; 627 /* additional working stuff */ 628 PC pc_R; 629 Mat F,B0 = NULL; 630 Vec vec_R_benign,vec_R_benign_2; 631 PetscBool isLU,isCHOL,isILU; 632 633 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 634 PetscScalar *work; 635 PetscInt *idx_V_B; 636 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 637 PetscInt i,n_R,n_D,n_B; 638 PetscBool unsymmetric_check; 639 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 640 MatType impMatType; 641 /* some shortcuts to scalars */ 642 PetscScalar one=1.0,m_one=-1.0; 643 644 PetscFunctionBegin; 645 n_benign = 0; 646 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 647 n_vertices = pcbddc->n_vertices; 648 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 649 /* Set Non-overlapping dimensions */ 650 n_B = pcis->n_B; 651 n_D = pcis->n - n_B; 652 n_R = pcis->n - n_vertices; 653 654 /* Set types for local objects needed by BDDC precondtioner */ 655 impMatType = MATSEQDENSE; 656 657 /* vertices in boundary numbering */ 658 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 659 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 660 if (i != n_vertices) { 661 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 662 } 663 664 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 665 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 666 if (!n_benign) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 668 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 670 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 672 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 674 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 675 } else { /* these wrappers are not needed with the benign trick */ 676 S_VV = S_CV = S_VC = S_CC = NULL; 677 } 678 679 unsymmetric_check = PETSC_FALSE; 680 /* allocate workspace */ 681 n = 0; 682 if (n_constraints) { 683 n += n_R*n_constraints; 684 } 685 if (n_vertices) { 686 n = PetscMax(2*n_R*n_vertices,n); 687 n = PetscMax((n_R+n_B)*n_vertices,n); 688 } 689 if (!pcbddc->symmetric_primal) { 690 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 691 unsymmetric_check = PETSC_TRUE; 692 } 693 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 694 695 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 696 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 699 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 700 if (isLU || isILU || isCHOL) { 701 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 702 } else if (sub_schurs->reuse_mumps) { 703 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 704 MatFactorType type; 705 706 F = reuse_mumps->F; 707 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 708 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 709 } else { 710 F = NULL; 711 } 712 713 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 714 if (n_constraints) { 715 Mat M1,M2,M3; 716 Mat auxmat; 717 IS is_aux; 718 PetscScalar *array,*array2; 719 720 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 722 723 /* Extract constraints on R nodes: C_{CR} */ 724 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 727 728 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 729 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 730 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 731 for (i=0;i<n_constraints;i++) { 732 const PetscScalar *row_cmat_values; 733 const PetscInt *row_cmat_indices; 734 PetscInt size_of_constraint,j; 735 736 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 737 for (j=0;j<size_of_constraint;j++) { 738 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 739 } 740 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 741 } 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 743 if (F) { 744 Mat B; 745 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 747 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 748 ierr = MatDestroy(&B);CHKERRQ(ierr); 749 } else { 750 PetscScalar *marr; 751 752 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 753 for (i=0;i<n_constraints;i++) { 754 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 756 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 758 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 759 } 760 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 761 } 762 if (!pcbddc->switch_static) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 764 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 765 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 766 for (i=0;i<n_constraints;i++) { 767 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 768 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 769 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 772 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 773 } 774 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 775 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 776 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 777 } else { 778 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 779 pcbddc->local_auxmat2 = local_auxmat2_R; 780 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 781 } 782 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 783 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 784 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 785 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 786 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 787 if (isCHOL) { 788 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 789 } else { 790 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 791 } 792 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 793 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 794 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 795 ierr = MatDestroy(&M2);CHKERRQ(ierr); 796 ierr = MatDestroy(&M3);CHKERRQ(ierr); 797 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 798 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 799 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 800 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 801 ierr = MatDestroy(&M1);CHKERRQ(ierr); 802 } 803 /* Get submatrices from subdomain matrix */ 804 if (n_vertices) { 805 IS is_aux; 806 807 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 808 IS tis; 809 810 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 811 ierr = ISSort(tis);CHKERRQ(ierr); 812 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 813 ierr = ISDestroy(&tis);CHKERRQ(ierr); 814 } else { 815 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 816 } 817 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 818 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 820 if (n_benign) { 821 IS dummy; 822 PetscInt ii[2]; 823 824 ii[0] = 0; 825 ii[1] = pcbddc->B0_ncol; 826 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 827 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 828 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 829 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 830 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 831 } 832 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 833 } 834 835 /* Matrix of coarse basis functions (local) */ 836 if (pcbddc->coarse_phi_B) { 837 PetscInt on_B,on_primal,on_D=n_D; 838 if (pcbddc->coarse_phi_D) { 839 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 840 } 841 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 842 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 843 PetscScalar *marray; 844 845 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 846 ierr = PetscFree(marray);CHKERRQ(ierr); 847 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 848 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 849 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 850 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 851 } 852 } 853 854 if (!pcbddc->coarse_phi_B) { 855 PetscScalar *marray; 856 857 n = n_B*pcbddc->local_primal_size; 858 if (pcbddc->switch_static || pcbddc->dbg_flag) { 859 n += n_D*pcbddc->local_primal_size; 860 } 861 if (!pcbddc->symmetric_primal) { 862 n *= 2; 863 } 864 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 866 n = n_B*pcbddc->local_primal_size; 867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 869 n += n_D*pcbddc->local_primal_size; 870 } 871 if (!pcbddc->symmetric_primal) { 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 873 if (pcbddc->switch_static || pcbddc->dbg_flag) { 874 n = n_B*pcbddc->local_primal_size; 875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 876 } 877 } else { 878 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 879 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 880 if (pcbddc->switch_static || pcbddc->dbg_flag) { 881 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 882 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 883 } 884 } 885 } 886 887 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 888 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 889 const PetscInt *idxs; 890 891 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 892 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 894 } 895 /* vertices */ 896 if (n_vertices) { 897 898 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 899 900 if (n_R) { 901 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 902 Mat S_V_benign; 903 PetscBLASInt B_N,B_one = 1; 904 PetscScalar *x,*y; 905 PetscBool isseqaij; 906 907 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 908 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 910 if (F) { /* TODO could be optimized for symmetric problems */ 911 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 912 } else { 913 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 914 for (i=0;i<n_vertices;i++) { 915 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 916 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 917 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 920 } 921 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 922 } 923 if (n_benign) { 924 const PetscScalar *vals; 925 const PetscInt *idxs; 926 PetscInt n; 927 928 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 929 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr); 930 ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr); 931 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 932 ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 933 ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr); 934 ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr); 935 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 936 ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr); 937 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 938 } 939 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 940 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 941 if (n_constraints) { 942 Mat B; 943 944 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 945 for (i=0;i<n_vertices;i++) { 946 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 947 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 } 953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 954 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 955 ierr = MatDestroy(&B);CHKERRQ(ierr); 956 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 957 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 958 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 961 ierr = MatDestroy(&B);CHKERRQ(ierr); 962 } 963 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 964 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 965 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 966 } 967 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 968 if (n_benign) { 969 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 970 } 971 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 973 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 974 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 975 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 976 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 978 if (!n_benign) { 979 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 980 } else { 981 const PetscScalar *vals; 982 PetscScalar val; 983 const PetscInt *idxs; 984 PetscInt n,j; 985 986 /* Use explicit loops to populate the local coarse matrix */ 987 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 988 for (j=0;j<n_vertices;j++) { 989 for (i=0;i<n_vertices;i++) { 990 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 991 } 992 } 993 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 994 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 995 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 996 for (j=0;j<n_vertices;j++) { 997 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 998 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 999 } 1000 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1001 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 1002 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 1003 ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr); 1004 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val; 1005 } 1006 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1007 } else { 1008 /* TODO benign case also here */ 1009 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 1010 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1011 } 1012 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1013 /* coarse basis functions */ 1014 for (i=0;i<n_vertices;i++) { 1015 PetscScalar *y; 1016 1017 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1018 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1019 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1020 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 y[n_B*i+idx_V_B[i]] = 1.0; 1023 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1024 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1025 1026 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1027 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1028 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1032 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1033 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1034 } 1035 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1036 } 1037 if (n_benign) { 1038 PetscScalar *y; 1039 1040 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1041 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1042 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1045 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1046 1047 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1048 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1049 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr); 1050 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1053 y[n_D*n_vertices+p0_lidx_I] = 0.0; 1054 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1055 } 1056 } 1057 /* if n_R == 0 the object is not destroyed */ 1058 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1059 } 1060 1061 if (n_constraints) { 1062 Mat B; 1063 1064 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1065 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1066 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1067 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1068 if (n_vertices) { 1069 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1070 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1071 } else { 1072 Mat S_VCt; 1073 1074 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1075 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1077 } 1078 } 1079 ierr = MatDestroy(&B);CHKERRQ(ierr); 1080 /* coarse basis functions */ 1081 for (i=0;i<n_constraints;i++) { 1082 PetscScalar *y; 1083 1084 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1085 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1086 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1090 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1091 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1092 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1093 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1094 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1097 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1098 } 1099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1100 } 1101 } 1102 if (n_constraints) { 1103 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1104 } 1105 1106 if (n_benign) { 1107 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1108 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1109 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1110 } 1111 1112 /* compute other basis functions for non-symmetric problems */ 1113 if (!pcbddc->symmetric_primal) { 1114 1115 if (n_constraints) { 1116 Mat S_CCT,B_C; 1117 1118 /* this is a lazy thing */ 1119 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1120 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1121 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1122 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1123 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1124 if (n_vertices) { 1125 Mat B_V,S_VCT; 1126 1127 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1128 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1129 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1130 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1131 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1132 } 1133 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1134 } else { /* if there are no constraints, reset work */ 1135 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1136 } 1137 if (n_vertices && n_R) { 1138 Mat A_VRT; 1139 PetscScalar *marray; 1140 PetscBLASInt B_N,B_one = 1; 1141 1142 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1143 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1144 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1145 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1146 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1147 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1148 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1149 } 1150 1151 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1152 for (i=0;i<pcbddc->local_primal_size;i++) { 1153 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1154 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1155 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1156 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1157 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1158 } 1159 } else { 1160 for (i=0;i<pcbddc->local_primal_size;i++) { 1161 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1162 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1163 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1164 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1165 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1166 } 1167 } 1168 /* coarse basis functions */ 1169 for (i=0;i<pcbddc->local_primal_size;i++) { 1170 PetscScalar *y; 1171 1172 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1173 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1174 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 if (i<n_vertices) { 1178 y[n_B*i+idx_V_B[i]] = 1.0; 1179 } 1180 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1181 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1182 1183 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1184 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1185 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1186 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1187 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1188 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1189 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1190 } 1191 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1192 } 1193 } 1194 /* free memory */ 1195 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1196 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1197 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1198 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1199 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1200 ierr = PetscFree(work);CHKERRQ(ierr); 1201 if (n_vertices) { 1202 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1203 } 1204 if (n_constraints) { 1205 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1206 } 1207 /* Checking coarse_sub_mat and coarse basis functios */ 1208 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1209 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1210 if (pcbddc->dbg_flag) { 1211 Mat coarse_sub_mat; 1212 Mat AUXMAT,TM1,TM2,TM3,TM4; 1213 Mat coarse_phi_D,coarse_phi_B; 1214 Mat coarse_psi_D,coarse_psi_B; 1215 Mat A_II,A_BB,A_IB,A_BI; 1216 Mat C_B,CPHI; 1217 IS is_dummy; 1218 Vec mones; 1219 MatType checkmattype=MATSEQAIJ; 1220 PetscReal real_value; 1221 1222 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1223 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1224 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1225 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1226 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1227 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1228 if (unsymmetric_check) { 1229 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1230 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1231 } 1232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1233 1234 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1237 if (unsymmetric_check) { 1238 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1239 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1240 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1241 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1242 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1243 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1244 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1245 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1246 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1247 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1248 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1249 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1250 } else { 1251 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1252 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1253 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1254 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1255 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1256 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1257 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1258 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1259 } 1260 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1261 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1262 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1263 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1264 if (n_benign) { 1265 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1266 PetscScalar *data,*data2; 1267 1268 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1269 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1270 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1271 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1272 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1273 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1274 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1275 for (i=0;i<pcbddc->local_primal_size;i++) { 1276 data[n_vertices*pcbddc->local_primal_size+i] += data2[i]; 1277 data[i*pcbddc->local_primal_size+n_vertices] += data2[i]; 1278 } 1279 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1280 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1281 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1282 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1283 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1284 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1285 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1286 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1287 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1288 } 1289 #if 0 1290 { 1291 PetscViewer viewer; 1292 char filename[256]; 1293 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1294 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1295 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1296 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1297 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1298 } 1299 #endif 1300 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1301 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1302 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1304 1305 /* check constraints */ 1306 if (!n_benign) { 1307 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1308 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1309 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1310 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1311 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1312 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1313 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1315 if (unsymmetric_check) { 1316 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1317 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1318 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1319 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1321 } 1322 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1323 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1324 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1325 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1326 } 1327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1328 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1329 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1330 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1331 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1332 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1333 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1334 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1335 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1336 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1337 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1338 if (unsymmetric_check) { 1339 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1340 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1341 } 1342 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1343 } 1344 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1345 /* get back data */ 1346 *coarse_submat_vals_n = coarse_submat_vals; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1352 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1353 { 1354 Mat *work_mat; 1355 IS isrow_s,iscol_s; 1356 PetscBool rsorted,csorted; 1357 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1362 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1363 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1364 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1365 1366 if (!rsorted) { 1367 const PetscInt *idxs; 1368 PetscInt *idxs_sorted,i; 1369 1370 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1371 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1372 for (i=0;i<rsize;i++) { 1373 idxs_perm_r[i] = i; 1374 } 1375 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1376 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1377 for (i=0;i<rsize;i++) { 1378 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1379 } 1380 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1384 isrow_s = isrow; 1385 } 1386 1387 if (!csorted) { 1388 if (isrow == iscol) { 1389 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1390 iscol_s = isrow_s; 1391 } else { 1392 const PetscInt *idxs; 1393 PetscInt *idxs_sorted,i; 1394 1395 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1396 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1397 for (i=0;i<csize;i++) { 1398 idxs_perm_c[i] = i; 1399 } 1400 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1401 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1402 for (i=0;i<csize;i++) { 1403 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1404 } 1405 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1406 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1407 } 1408 } else { 1409 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1410 iscol_s = iscol; 1411 } 1412 1413 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1414 1415 if (!rsorted || !csorted) { 1416 Mat new_mat; 1417 IS is_perm_r,is_perm_c; 1418 1419 if (!rsorted) { 1420 PetscInt *idxs_r,i; 1421 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1422 for (i=0;i<rsize;i++) { 1423 idxs_r[idxs_perm_r[i]] = i; 1424 } 1425 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1426 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1427 } else { 1428 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1429 } 1430 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1431 1432 if (!csorted) { 1433 if (isrow_s == iscol_s) { 1434 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1435 is_perm_c = is_perm_r; 1436 } else { 1437 PetscInt *idxs_c,i; 1438 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1439 for (i=0;i<csize;i++) { 1440 idxs_c[idxs_perm_c[i]] = i; 1441 } 1442 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1443 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1444 } 1445 } else { 1446 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1447 } 1448 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1449 1450 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1451 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1452 work_mat[0] = new_mat; 1453 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1454 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1455 } 1456 1457 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1458 *B = work_mat[0]; 1459 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1460 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1461 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1467 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1468 { 1469 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1470 PC_IS* pcis = (PC_IS*)pc->data; 1471 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1472 Mat new_mat; 1473 IS is_local,is_global; 1474 PetscInt local_size; 1475 PetscBool isseqaij; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1480 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1482 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1483 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1484 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1485 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1486 1487 /* check */ 1488 if (pcbddc->dbg_flag) { 1489 Vec x,x_change; 1490 PetscReal error; 1491 1492 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1493 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1494 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1495 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1498 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1499 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1500 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1501 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1504 ierr = VecDestroy(&x);CHKERRQ(ierr); 1505 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1506 } 1507 1508 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1509 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1510 if (isseqaij) { 1511 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1512 } else { 1513 Mat work_mat; 1514 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1515 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1516 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1517 } 1518 if (matis->A->symmetric_set) { 1519 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1520 #if !defined(PETSC_USE_COMPLEX) 1521 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1522 #endif 1523 } 1524 /* 1525 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1526 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1527 */ 1528 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1534 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1535 { 1536 PC_IS* pcis = (PC_IS*)(pc->data); 1537 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1538 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1539 PetscInt *idx_R_local=NULL; 1540 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1541 PetscInt vbs,bs; 1542 PetscBT bitmask=NULL; 1543 PetscErrorCode ierr; 1544 1545 PetscFunctionBegin; 1546 /* 1547 No need to setup local scatters if 1548 - primal space is unchanged 1549 AND 1550 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1551 AND 1552 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1553 */ 1554 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1555 PetscFunctionReturn(0); 1556 } 1557 /* destroy old objects */ 1558 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1559 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1560 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1561 /* Set Non-overlapping dimensions */ 1562 n_B = pcis->n_B; 1563 n_D = pcis->n - n_B; 1564 n_vertices = pcbddc->n_vertices; 1565 1566 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1567 1568 /* create auxiliary bitmask and allocate workspace */ 1569 if (!sub_schurs->reuse_mumps) { 1570 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1571 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1572 for (i=0;i<n_vertices;i++) { 1573 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1574 } 1575 1576 for (i=0, n_R=0; i<pcis->n; i++) { 1577 if (!PetscBTLookup(bitmask,i)) { 1578 idx_R_local[n_R++] = i; 1579 } 1580 } 1581 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1582 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1583 1584 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1585 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1586 } 1587 1588 /* Block code */ 1589 vbs = 1; 1590 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1591 if (bs>1 && !(n_vertices%bs)) { 1592 PetscBool is_blocked = PETSC_TRUE; 1593 PetscInt *vary; 1594 if (!sub_schurs->reuse_mumps) { 1595 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1596 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1597 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1598 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1599 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1600 for (i=0; i<pcis->n/bs; i++) { 1601 if (vary[i]!=0 && vary[i]!=bs) { 1602 is_blocked = PETSC_FALSE; 1603 break; 1604 } 1605 } 1606 ierr = PetscFree(vary);CHKERRQ(ierr); 1607 } else { 1608 /* Verify directly the R set */ 1609 for (i=0; i<n_R/bs; i++) { 1610 PetscInt j,node=idx_R_local[bs*i]; 1611 for (j=1; j<bs; j++) { 1612 if (node != idx_R_local[bs*i+j]-j) { 1613 is_blocked = PETSC_FALSE; 1614 break; 1615 } 1616 } 1617 } 1618 } 1619 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1620 vbs = bs; 1621 for (i=0;i<n_R/vbs;i++) { 1622 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1623 } 1624 } 1625 } 1626 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1627 if (sub_schurs->reuse_mumps) { 1628 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1629 1630 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1631 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1632 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1633 reuse_mumps->is_R = pcbddc->is_R_local; 1634 } else { 1635 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1636 } 1637 1638 /* print some info if requested */ 1639 if (pcbddc->dbg_flag) { 1640 PetscInt benign = 0; 1641 1642 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1645 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1650 } 1651 1652 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1653 if (!sub_schurs->reuse_mumps) { 1654 IS is_aux1,is_aux2; 1655 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1656 1657 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1658 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1659 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1660 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1661 for (i=0; i<n_D; i++) { 1662 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1663 } 1664 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1665 for (i=0, j=0; i<n_R; i++) { 1666 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1667 aux_array1[j++] = i; 1668 } 1669 } 1670 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1671 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1672 for (i=0, j=0; i<n_B; i++) { 1673 if (!PetscBTLookup(bitmask,is_indices[i])) { 1674 aux_array2[j++] = i; 1675 } 1676 } 1677 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1678 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1679 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1680 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1681 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1682 1683 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1684 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1685 for (i=0, j=0; i<n_R; i++) { 1686 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1687 aux_array1[j++] = i; 1688 } 1689 } 1690 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1691 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1692 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1693 } 1694 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1695 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1696 } else { 1697 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1698 IS tis; 1699 PetscInt schur_size; 1700 1701 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1702 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1703 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1705 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1706 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1707 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1708 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1709 } 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1717 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1718 { 1719 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1720 PC_IS *pcis = (PC_IS*)pc->data; 1721 PC pc_temp; 1722 Mat A_RR; 1723 MatReuse reuse; 1724 PetscScalar m_one = -1.0; 1725 PetscReal value; 1726 PetscInt n_D,n_R; 1727 PetscBool use_exact,use_exact_reduced,issbaij; 1728 PetscErrorCode ierr; 1729 /* prefixes stuff */ 1730 char dir_prefix[256],neu_prefix[256],str_level[16]; 1731 size_t len; 1732 1733 PetscFunctionBegin; 1734 1735 /* compute prefixes */ 1736 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1737 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1738 if (!pcbddc->current_level) { 1739 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1740 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1741 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1742 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1743 } else { 1744 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1745 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1746 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1747 len -= 15; /* remove "pc_bddc_coarse_" */ 1748 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1749 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1750 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1751 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1752 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1753 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1754 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1755 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1756 } 1757 1758 /* DIRICHLET PROBLEM */ 1759 if (dirichlet) { 1760 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1761 if (pcbddc->local_mat->symmetric_set) { 1762 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1763 } 1764 /* Matrix for Dirichlet problem is pcis->A_II */ 1765 n_D = pcis->n - pcis->n_B; 1766 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1767 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1768 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1769 /* default */ 1770 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1771 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1772 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1773 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1774 if (issbaij) { 1775 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1776 } else { 1777 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1778 } 1779 /* Allow user's customization */ 1780 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1781 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1782 } 1783 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1784 if (sub_schurs->reuse_mumps) { 1785 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1786 1787 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1788 } 1789 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1790 if (!n_D) { 1791 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1792 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1793 } 1794 /* Set Up KSP for Dirichlet problem of BDDC */ 1795 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1796 /* set ksp_D into pcis data */ 1797 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1798 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1799 pcis->ksp_D = pcbddc->ksp_D; 1800 } 1801 1802 /* NEUMANN PROBLEM */ 1803 A_RR = 0; 1804 if (neumann) { 1805 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1806 PetscInt ibs,mbs; 1807 PetscBool issbaij; 1808 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1809 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1810 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1811 if (pcbddc->ksp_R) { /* already created ksp */ 1812 PetscInt nn_R; 1813 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1814 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1815 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1816 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1817 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1818 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1819 reuse = MAT_INITIAL_MATRIX; 1820 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1821 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1822 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1823 reuse = MAT_INITIAL_MATRIX; 1824 } else { /* safe to reuse the matrix */ 1825 reuse = MAT_REUSE_MATRIX; 1826 } 1827 } 1828 /* last check */ 1829 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1830 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1831 reuse = MAT_INITIAL_MATRIX; 1832 } 1833 } else { /* first time, so we need to create the matrix */ 1834 reuse = MAT_INITIAL_MATRIX; 1835 } 1836 /* extract A_RR */ 1837 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1838 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1839 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1840 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1841 if (matis->A == pcbddc->local_mat) { 1842 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1843 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1844 } else { 1845 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1846 } 1847 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1848 if (matis->A == pcbddc->local_mat) { 1849 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1850 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1851 } else { 1852 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1853 } 1854 } 1855 if (!sub_schurs->reuse_mumps) { 1856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1857 if (pcbddc->local_mat->symmetric_set) { 1858 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1859 } 1860 } else { 1861 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1862 1863 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1864 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1865 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1866 } 1867 if (!pcbddc->ksp_R) { /* create object if not present */ 1868 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1869 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1870 /* default */ 1871 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1872 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1873 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1874 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1875 if (issbaij) { 1876 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1877 } else { 1878 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1879 } 1880 /* Allow user's customization */ 1881 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1882 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1883 } 1884 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1885 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1886 if (!n_R) { 1887 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1888 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1889 } 1890 /* Reuse MUMPS solver if it is present */ 1891 if (sub_schurs->reuse_mumps) { 1892 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1893 1894 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1895 } 1896 /* Set Up KSP for Neumann problem of BDDC */ 1897 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1898 } 1899 /* free Neumann problem's matrix */ 1900 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1901 1902 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1903 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1904 if (pcbddc->dbg_flag) { 1905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1906 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1908 } 1909 if (dirichlet) { /* Dirichlet */ 1910 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1911 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1912 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1913 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1914 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1915 /* need to be adapted? */ 1916 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1917 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1918 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1919 /* print info */ 1920 if (pcbddc->dbg_flag) { 1921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1922 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1923 } 1924 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1925 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1926 } 1927 } 1928 if (neumann) { /* Neumann */ 1929 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1930 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1931 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1932 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1934 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1935 /* need to be adapted? */ 1936 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1937 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1938 /* print info */ 1939 if (pcbddc->dbg_flag) { 1940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1942 } 1943 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1944 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1945 } 1946 } 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1953 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1954 { 1955 PetscErrorCode ierr; 1956 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1957 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1958 1959 PetscFunctionBegin; 1960 if (!sub_schurs->reuse_mumps) { 1961 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1962 } 1963 if (!pcbddc->switch_static) { 1964 if (applytranspose && pcbddc->local_auxmat1) { 1965 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1966 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1967 } 1968 if (!sub_schurs->reuse_mumps) { 1969 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1971 } else { 1972 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1973 1974 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 } 1977 } else { 1978 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1979 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1980 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1981 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1982 if (applytranspose && pcbddc->local_auxmat1) { 1983 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1984 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1985 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1986 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1987 } 1988 } 1989 if (!sub_schurs->reuse_mumps) { 1990 if (applytranspose) { 1991 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1992 } else { 1993 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1994 } 1995 #if defined(PETSC_HAVE_MUMPS) 1996 } else { 1997 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1998 1999 if (applytranspose) { 2000 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2003 } 2004 #endif 2005 } 2006 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2007 if (!pcbddc->switch_static) { 2008 if (!sub_schurs->reuse_mumps) { 2009 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2011 } else { 2012 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2013 2014 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2015 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2016 } 2017 if (!applytranspose && pcbddc->local_auxmat1) { 2018 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2019 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2020 } 2021 } else { 2022 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 if (!applytranspose && pcbddc->local_auxmat1) { 2027 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2028 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2029 } 2030 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2041 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2042 { 2043 PetscErrorCode ierr; 2044 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2045 PC_IS* pcis = (PC_IS*) (pc->data); 2046 const PetscScalar zero = 0.0; 2047 2048 PetscFunctionBegin; 2049 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2050 if (applytranspose) { 2051 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2052 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2053 } else { 2054 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2055 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2056 } 2057 2058 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2059 if (pcbddc->benign_p0_lidx >= 0) { 2060 PetscScalar *array; 2061 2062 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2063 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2064 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2065 } 2066 2067 /* start communications from local primal nodes to rhs of coarse solver */ 2068 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2069 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 2072 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2073 /* TODO remove null space when doing multilevel */ 2074 if (pcbddc->coarse_ksp) { 2075 Vec rhs,sol; 2076 2077 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2078 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2079 if (applytranspose) { 2080 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2081 } else { 2082 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2083 } 2084 } 2085 2086 /* Local solution on R nodes */ 2087 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2088 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2089 } 2090 2091 /* communications from coarse sol to local primal nodes */ 2092 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2093 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2094 2095 /* Sum contributions from two levels */ 2096 if (applytranspose) { 2097 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2098 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2099 } else { 2100 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2101 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2102 } 2103 /* store p0 */ 2104 if (pcbddc->benign_p0_lidx >= 0) { 2105 PetscScalar *array; 2106 2107 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2108 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2109 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2116 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2117 { 2118 PetscErrorCode ierr; 2119 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2120 PetscScalar *array; 2121 Vec from,to; 2122 2123 PetscFunctionBegin; 2124 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2125 from = pcbddc->coarse_vec; 2126 to = pcbddc->vec1_P; 2127 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2128 Vec tvec; 2129 2130 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2131 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2132 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2133 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2134 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2135 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2136 } 2137 } else { /* from local to global -> put data in coarse right hand side */ 2138 from = pcbddc->vec1_P; 2139 to = pcbddc->coarse_vec; 2140 } 2141 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2147 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2148 { 2149 PetscErrorCode ierr; 2150 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2151 PetscScalar *array; 2152 Vec from,to; 2153 2154 PetscFunctionBegin; 2155 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2156 from = pcbddc->coarse_vec; 2157 to = pcbddc->vec1_P; 2158 } else { /* from local to global -> put data in coarse right hand side */ 2159 from = pcbddc->vec1_P; 2160 to = pcbddc->coarse_vec; 2161 } 2162 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2163 if (smode == SCATTER_FORWARD) { 2164 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2165 Vec tvec; 2166 2167 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2168 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2169 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2170 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2171 } 2172 } else { 2173 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2174 ierr = VecResetArray(from);CHKERRQ(ierr); 2175 } 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /* uncomment for testing purposes */ 2181 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2184 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2185 { 2186 PetscErrorCode ierr; 2187 PC_IS* pcis = (PC_IS*)(pc->data); 2188 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2189 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2190 /* one and zero */ 2191 PetscScalar one=1.0,zero=0.0; 2192 /* space to store constraints and their local indices */ 2193 PetscScalar *constraints_data; 2194 PetscInt *constraints_idxs,*constraints_idxs_B; 2195 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2196 PetscInt *constraints_n; 2197 /* iterators */ 2198 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2199 /* BLAS integers */ 2200 PetscBLASInt lwork,lierr; 2201 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2202 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2203 /* reuse */ 2204 PetscInt olocal_primal_size,olocal_primal_size_cc; 2205 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2206 /* change of basis */ 2207 PetscBool qr_needed; 2208 PetscBT change_basis,qr_needed_idx; 2209 /* auxiliary stuff */ 2210 PetscInt *nnz,*is_indices; 2211 PetscInt ncc; 2212 /* some quantities */ 2213 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2214 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2215 2216 PetscFunctionBegin; 2217 /* Destroy Mat objects computed previously */ 2218 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2219 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2220 /* save info on constraints from previous setup (if any) */ 2221 olocal_primal_size = pcbddc->local_primal_size; 2222 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2223 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2224 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2225 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2226 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2227 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2228 2229 /* print some info */ 2230 if (pcbddc->dbg_flag) { 2231 IS vertices; 2232 PetscInt nv,nedges,nfaces; 2233 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2234 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2235 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2236 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2242 } 2243 2244 if (!pcbddc->adaptive_selection) { 2245 IS ISForVertices,*ISForFaces,*ISForEdges; 2246 MatNullSpace nearnullsp; 2247 const Vec *nearnullvecs; 2248 Vec *localnearnullsp; 2249 PetscScalar *array; 2250 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2251 PetscBool nnsp_has_cnst; 2252 /* LAPACK working arrays for SVD or POD */ 2253 PetscBool skip_lapack,boolforchange; 2254 PetscScalar *work; 2255 PetscReal *singular_vals; 2256 #if defined(PETSC_USE_COMPLEX) 2257 PetscReal *rwork; 2258 #endif 2259 #if defined(PETSC_MISSING_LAPACK_GESVD) 2260 PetscScalar *temp_basis,*correlation_mat; 2261 #else 2262 PetscBLASInt dummy_int=1; 2263 PetscScalar dummy_scalar=1.; 2264 #endif 2265 2266 /* Get index sets for faces, edges and vertices from graph */ 2267 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2268 /* free unneeded index sets */ 2269 if (!pcbddc->use_vertices) { 2270 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2271 } 2272 if (!pcbddc->use_edges) { 2273 for (i=0;i<n_ISForEdges;i++) { 2274 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2275 } 2276 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2277 n_ISForEdges = 0; 2278 } 2279 if (!pcbddc->use_faces) { 2280 for (i=0;i<n_ISForFaces;i++) { 2281 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2282 } 2283 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2284 n_ISForFaces = 0; 2285 } 2286 2287 #if defined(PETSC_USE_DEBUG) 2288 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2289 Also use_change_of_basis should be consistent among processors */ 2290 if (pcbddc->NullSpace) { 2291 PetscBool tbool[2],gbool[2]; 2292 2293 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2294 pcbddc->use_change_of_basis = PETSC_TRUE; 2295 if (!ISForEdges) { 2296 pcbddc->use_change_on_faces = PETSC_TRUE; 2297 } 2298 } 2299 tbool[0] = pcbddc->use_change_of_basis; 2300 tbool[1] = pcbddc->use_change_on_faces; 2301 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2302 pcbddc->use_change_of_basis = gbool[0]; 2303 pcbddc->use_change_on_faces = gbool[1]; 2304 } 2305 #endif 2306 2307 /* check if near null space is attached to global mat */ 2308 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2309 if (nearnullsp) { 2310 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2311 /* remove any stored info */ 2312 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2313 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2314 /* store information for BDDC solver reuse */ 2315 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2316 pcbddc->onearnullspace = nearnullsp; 2317 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2318 for (i=0;i<nnsp_size;i++) { 2319 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2320 } 2321 } else { /* if near null space is not provided BDDC uses constants by default */ 2322 nnsp_size = 0; 2323 nnsp_has_cnst = PETSC_TRUE; 2324 } 2325 /* get max number of constraints on a single cc */ 2326 max_constraints = nnsp_size; 2327 if (nnsp_has_cnst) max_constraints++; 2328 2329 /* 2330 Evaluate maximum storage size needed by the procedure 2331 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2332 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2333 There can be multiple constraints per connected component 2334 */ 2335 n_vertices = 0; 2336 if (ISForVertices) { 2337 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2338 } 2339 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2340 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2341 2342 total_counts = n_ISForFaces+n_ISForEdges; 2343 total_counts *= max_constraints; 2344 total_counts += n_vertices; 2345 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2346 2347 total_counts = 0; 2348 max_size_of_constraint = 0; 2349 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2350 IS used_is; 2351 if (i<n_ISForEdges) { 2352 used_is = ISForEdges[i]; 2353 } else { 2354 used_is = ISForFaces[i-n_ISForEdges]; 2355 } 2356 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2357 total_counts += j; 2358 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2359 } 2360 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2361 2362 /* get local part of global near null space vectors */ 2363 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2364 for (k=0;k<nnsp_size;k++) { 2365 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2366 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2367 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2368 } 2369 2370 /* whether or not to skip lapack calls */ 2371 skip_lapack = PETSC_TRUE; 2372 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2373 2374 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2375 if (!skip_lapack) { 2376 PetscScalar temp_work; 2377 2378 #if defined(PETSC_MISSING_LAPACK_GESVD) 2379 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2380 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2381 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2382 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2383 #if defined(PETSC_USE_COMPLEX) 2384 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2385 #endif 2386 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2387 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2388 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2389 lwork = -1; 2390 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2391 #if !defined(PETSC_USE_COMPLEX) 2392 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2393 #else 2394 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2395 #endif 2396 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2397 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2398 #else /* on missing GESVD */ 2399 /* SVD */ 2400 PetscInt max_n,min_n; 2401 max_n = max_size_of_constraint; 2402 min_n = max_constraints; 2403 if (max_size_of_constraint < max_constraints) { 2404 min_n = max_size_of_constraint; 2405 max_n = max_constraints; 2406 } 2407 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2408 #if defined(PETSC_USE_COMPLEX) 2409 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2410 #endif 2411 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2412 lwork = -1; 2413 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2414 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2416 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2417 #if !defined(PETSC_USE_COMPLEX) 2418 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2419 #else 2420 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2421 #endif 2422 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2423 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2424 #endif /* on missing GESVD */ 2425 /* Allocate optimal workspace */ 2426 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2427 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2428 } 2429 /* Now we can loop on constraining sets */ 2430 total_counts = 0; 2431 constraints_idxs_ptr[0] = 0; 2432 constraints_data_ptr[0] = 0; 2433 /* vertices */ 2434 if (n_vertices) { 2435 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2436 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2437 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2438 for (i=0;i<n_vertices;i++) { 2439 constraints_n[total_counts] = 1; 2440 constraints_data[total_counts] = 1.0; 2441 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2442 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2443 total_counts++; 2444 } 2445 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2446 PetscBool used_vertex; 2447 for (i=0;i<n_vertices;i++) { 2448 used_vertex = PETSC_FALSE; 2449 k = 0; 2450 while (!used_vertex && k<nnsp_size) { 2451 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2452 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2453 constraints_n[total_counts] = 1; 2454 constraints_idxs[total_counts] = is_indices[i]; 2455 constraints_data[total_counts] = 1.0; 2456 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2457 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2458 total_counts++; 2459 used_vertex = PETSC_TRUE; 2460 } 2461 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2462 k++; 2463 } 2464 } 2465 } 2466 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2467 n_vertices = total_counts; 2468 } 2469 2470 /* edges and faces */ 2471 total_counts_cc = total_counts; 2472 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2473 IS used_is; 2474 PetscBool idxs_copied = PETSC_FALSE; 2475 2476 if (ncc<n_ISForEdges) { 2477 used_is = ISForEdges[ncc]; 2478 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2479 } else { 2480 used_is = ISForFaces[ncc-n_ISForEdges]; 2481 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2482 } 2483 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2484 2485 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2486 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2487 /* change of basis should not be performed on local periodic nodes */ 2488 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2489 if (nnsp_has_cnst) { 2490 PetscScalar quad_value; 2491 2492 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2493 idxs_copied = PETSC_TRUE; 2494 2495 if (!pcbddc->use_nnsp_true) { 2496 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2497 } else { 2498 quad_value = 1.0; 2499 } 2500 for (j=0;j<size_of_constraint;j++) { 2501 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2502 } 2503 temp_constraints++; 2504 total_counts++; 2505 } 2506 for (k=0;k<nnsp_size;k++) { 2507 PetscReal real_value; 2508 PetscScalar *ptr_to_data; 2509 2510 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2511 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2512 for (j=0;j<size_of_constraint;j++) { 2513 ptr_to_data[j] = array[is_indices[j]]; 2514 } 2515 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2516 /* check if array is null on the connected component */ 2517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2518 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2519 if (real_value > 0.0) { /* keep indices and values */ 2520 temp_constraints++; 2521 total_counts++; 2522 if (!idxs_copied) { 2523 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2524 idxs_copied = PETSC_TRUE; 2525 } 2526 } 2527 } 2528 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2529 valid_constraints = temp_constraints; 2530 if (!pcbddc->use_nnsp_true && temp_constraints) { 2531 if (temp_constraints == 1) { /* just normalize the constraint */ 2532 PetscScalar norm,*ptr_to_data; 2533 2534 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2536 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2537 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2538 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2539 } else { /* perform SVD */ 2540 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2541 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2542 2543 #if defined(PETSC_MISSING_LAPACK_GESVD) 2544 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2545 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2546 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2547 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2548 from that computed using LAPACKgesvd 2549 -> This is due to a different computation of eigenvectors in LAPACKheev 2550 -> The quality of the POD-computed basis will be the same */ 2551 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2552 /* Store upper triangular part of correlation matrix */ 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2554 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2555 for (j=0;j<temp_constraints;j++) { 2556 for (k=0;k<j+1;k++) { 2557 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2558 } 2559 } 2560 /* compute eigenvalues and eigenvectors of correlation matrix */ 2561 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2563 #if !defined(PETSC_USE_COMPLEX) 2564 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2565 #else 2566 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2567 #endif 2568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2569 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2570 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2571 j = 0; 2572 while (j < temp_constraints && singular_vals[j] < tol) j++; 2573 total_counts = total_counts-j; 2574 valid_constraints = temp_constraints-j; 2575 /* scale and copy POD basis into used quadrature memory */ 2576 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2582 if (j<temp_constraints) { 2583 PetscInt ii; 2584 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2585 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2586 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2587 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2588 for (k=0;k<temp_constraints-j;k++) { 2589 for (ii=0;ii<size_of_constraint;ii++) { 2590 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2591 } 2592 } 2593 } 2594 #else /* on missing GESVD */ 2595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2596 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2597 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2598 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2599 #if !defined(PETSC_USE_COMPLEX) 2600 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2601 #else 2602 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2603 #endif 2604 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2605 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2606 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2607 k = temp_constraints; 2608 if (k > size_of_constraint) k = size_of_constraint; 2609 j = 0; 2610 while (j < k && singular_vals[k-j-1] < tol) j++; 2611 valid_constraints = k-j; 2612 total_counts = total_counts-temp_constraints+valid_constraints; 2613 #endif /* on missing GESVD */ 2614 } 2615 } 2616 /* update pointers information */ 2617 if (valid_constraints) { 2618 constraints_n[total_counts_cc] = valid_constraints; 2619 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2620 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2621 /* set change_of_basis flag */ 2622 if (boolforchange) { 2623 PetscBTSet(change_basis,total_counts_cc); 2624 } 2625 total_counts_cc++; 2626 } 2627 } 2628 /* free workspace */ 2629 if (!skip_lapack) { 2630 ierr = PetscFree(work);CHKERRQ(ierr); 2631 #if defined(PETSC_USE_COMPLEX) 2632 ierr = PetscFree(rwork);CHKERRQ(ierr); 2633 #endif 2634 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2635 #if defined(PETSC_MISSING_LAPACK_GESVD) 2636 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2637 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2638 #endif 2639 } 2640 for (k=0;k<nnsp_size;k++) { 2641 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2642 } 2643 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2644 /* free index sets of faces, edges and vertices */ 2645 for (i=0;i<n_ISForFaces;i++) { 2646 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2647 } 2648 if (n_ISForFaces) { 2649 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2650 } 2651 for (i=0;i<n_ISForEdges;i++) { 2652 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2653 } 2654 if (n_ISForEdges) { 2655 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2656 } 2657 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2658 } else { 2659 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2660 2661 total_counts = 0; 2662 n_vertices = 0; 2663 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2664 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2665 } 2666 max_constraints = 0; 2667 total_counts_cc = 0; 2668 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2669 total_counts += pcbddc->adaptive_constraints_n[i]; 2670 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2671 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2672 } 2673 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2674 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2675 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2676 constraints_data = pcbddc->adaptive_constraints_data; 2677 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2678 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2679 total_counts_cc = 0; 2680 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2681 if (pcbddc->adaptive_constraints_n[i]) { 2682 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2683 } 2684 } 2685 #if 0 2686 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2687 for (i=0;i<total_counts_cc;i++) { 2688 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2689 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2690 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2691 printf(" %d",constraints_idxs[j]); 2692 } 2693 printf("\n"); 2694 printf("number of cc: %d\n",constraints_n[i]); 2695 } 2696 for (i=0;i<n_vertices;i++) { 2697 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2698 } 2699 for (i=0;i<sub_schurs->n_subs;i++) { 2700 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2701 } 2702 #endif 2703 2704 max_size_of_constraint = 0; 2705 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2706 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2707 /* Change of basis */ 2708 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2709 if (pcbddc->use_change_of_basis) { 2710 for (i=0;i<sub_schurs->n_subs;i++) { 2711 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2712 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2713 } 2714 } 2715 } 2716 } 2717 pcbddc->local_primal_size = total_counts; 2718 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2719 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2720 2721 /* map constraints_idxs in boundary numbering */ 2722 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2723 if (i != constraints_idxs_ptr[total_counts_cc]) { 2724 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2725 } 2726 2727 /* Create constraint matrix */ 2728 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2729 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2730 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2731 2732 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2733 /* determine if a QR strategy is needed for change of basis */ 2734 qr_needed = PETSC_FALSE; 2735 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2736 total_primal_vertices=0; 2737 pcbddc->local_primal_size_cc = 0; 2738 for (i=0;i<total_counts_cc;i++) { 2739 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2740 if (size_of_constraint == 1) { 2741 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2742 pcbddc->local_primal_size_cc += 1; 2743 } else if (PetscBTLookup(change_basis,i)) { 2744 for (k=0;k<constraints_n[i];k++) { 2745 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2746 } 2747 pcbddc->local_primal_size_cc += constraints_n[i]; 2748 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2749 PetscBTSet(qr_needed_idx,i); 2750 qr_needed = PETSC_TRUE; 2751 } 2752 } else { 2753 pcbddc->local_primal_size_cc += 1; 2754 } 2755 } 2756 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2757 pcbddc->n_vertices = total_primal_vertices; 2758 /* permute indices in order to have a sorted set of vertices */ 2759 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2760 2761 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2762 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2763 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2764 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2765 2766 /* nonzero structure of constraint matrix */ 2767 /* and get reference dof for local constraints */ 2768 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2769 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2770 2771 j = total_primal_vertices; 2772 total_counts = total_primal_vertices; 2773 cum = total_primal_vertices; 2774 for (i=n_vertices;i<total_counts_cc;i++) { 2775 if (!PetscBTLookup(change_basis,i)) { 2776 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2777 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2778 cum++; 2779 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2780 for (k=0;k<constraints_n[i];k++) { 2781 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2782 nnz[j+k] = size_of_constraint; 2783 } 2784 j += constraints_n[i]; 2785 } 2786 } 2787 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2788 ierr = PetscFree(nnz);CHKERRQ(ierr); 2789 2790 /* set values in constraint matrix */ 2791 for (i=0;i<total_primal_vertices;i++) { 2792 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2793 } 2794 total_counts = total_primal_vertices; 2795 for (i=n_vertices;i<total_counts_cc;i++) { 2796 if (!PetscBTLookup(change_basis,i)) { 2797 PetscInt *cols; 2798 2799 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2800 cols = constraints_idxs+constraints_idxs_ptr[i]; 2801 for (k=0;k<constraints_n[i];k++) { 2802 PetscInt row = total_counts+k; 2803 PetscScalar *vals; 2804 2805 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2806 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2807 } 2808 total_counts += constraints_n[i]; 2809 } 2810 } 2811 /* assembling */ 2812 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 2815 /* 2816 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2817 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2818 */ 2819 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2820 if (pcbddc->use_change_of_basis) { 2821 /* dual and primal dofs on a single cc */ 2822 PetscInt dual_dofs,primal_dofs; 2823 /* working stuff for GEQRF */ 2824 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2825 PetscBLASInt lqr_work; 2826 /* working stuff for UNGQR */ 2827 PetscScalar *gqr_work,lgqr_work_t; 2828 PetscBLASInt lgqr_work; 2829 /* working stuff for TRTRS */ 2830 PetscScalar *trs_rhs; 2831 PetscBLASInt Blas_NRHS; 2832 /* pointers for values insertion into change of basis matrix */ 2833 PetscInt *start_rows,*start_cols; 2834 PetscScalar *start_vals; 2835 /* working stuff for values insertion */ 2836 PetscBT is_primal; 2837 PetscInt *aux_primal_numbering_B; 2838 /* matrix sizes */ 2839 PetscInt global_size,local_size; 2840 /* temporary change of basis */ 2841 Mat localChangeOfBasisMatrix; 2842 /* extra space for debugging */ 2843 PetscScalar *dbg_work; 2844 2845 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2846 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2847 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2848 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2849 /* nonzeros for local mat */ 2850 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2851 for (i=0;i<pcis->n;i++) nnz[i]=1; 2852 for (i=n_vertices;i<total_counts_cc;i++) { 2853 if (PetscBTLookup(change_basis,i)) { 2854 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2855 if (PetscBTLookup(qr_needed_idx,i)) { 2856 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2857 } else { 2858 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2859 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2860 } 2861 } 2862 } 2863 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2864 ierr = PetscFree(nnz);CHKERRQ(ierr); 2865 /* Set initial identity in the matrix */ 2866 for (i=0;i<pcis->n;i++) { 2867 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2868 } 2869 2870 if (pcbddc->dbg_flag) { 2871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2873 } 2874 2875 2876 /* Now we loop on the constraints which need a change of basis */ 2877 /* 2878 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2879 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2880 2881 Basic blocks of change of basis matrix T computed by 2882 2883 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2884 2885 | 1 0 ... 0 s_1/S | 2886 | 0 1 ... 0 s_2/S | 2887 | ... | 2888 | 0 ... 1 s_{n-1}/S | 2889 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2890 2891 with S = \sum_{i=1}^n s_i^2 2892 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2893 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2894 2895 - QR decomposition of constraints otherwise 2896 */ 2897 if (qr_needed) { 2898 /* space to store Q */ 2899 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2900 /* first we issue queries for optimal work */ 2901 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2902 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2904 lqr_work = -1; 2905 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2906 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2907 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2908 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2909 lgqr_work = -1; 2910 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2913 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2914 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2915 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2916 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2917 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2918 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2919 /* array to store scaling factors for reflectors */ 2920 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2921 /* array to store rhs and solution of triangular solver */ 2922 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2923 /* allocating workspace for check */ 2924 if (pcbddc->dbg_flag) { 2925 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2926 } 2927 } 2928 /* array to store whether a node is primal or not */ 2929 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2930 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2931 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2932 if (i != total_primal_vertices) { 2933 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2934 } 2935 for (i=0;i<total_primal_vertices;i++) { 2936 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2937 } 2938 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2939 2940 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2941 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2942 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2943 if (PetscBTLookup(change_basis,total_counts)) { 2944 /* get constraint info */ 2945 primal_dofs = constraints_n[total_counts]; 2946 dual_dofs = size_of_constraint-primal_dofs; 2947 2948 if (pcbddc->dbg_flag) { 2949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2950 } 2951 2952 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2953 2954 /* copy quadrature constraints for change of basis check */ 2955 if (pcbddc->dbg_flag) { 2956 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2957 } 2958 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2959 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2960 2961 /* compute QR decomposition of constraints */ 2962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2963 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2964 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2965 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2966 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2967 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2968 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2969 2970 /* explictly compute R^-T */ 2971 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2972 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2973 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2974 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2978 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2979 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2980 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2981 2982 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2984 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2985 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2987 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2988 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2989 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2990 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2991 2992 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2993 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2994 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2995 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2996 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2997 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2998 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2999 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3001 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3002 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3003 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3004 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3005 3006 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3007 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3008 /* insert cols for primal dofs */ 3009 for (j=0;j<primal_dofs;j++) { 3010 start_vals = &qr_basis[j*size_of_constraint]; 3011 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3012 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3013 } 3014 /* insert cols for dual dofs */ 3015 for (j=0,k=0;j<dual_dofs;k++) { 3016 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3017 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3018 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3019 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3020 j++; 3021 } 3022 } 3023 3024 /* check change of basis */ 3025 if (pcbddc->dbg_flag) { 3026 PetscInt ii,jj; 3027 PetscBool valid_qr=PETSC_TRUE; 3028 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3029 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3035 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3036 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3037 for (jj=0;jj<size_of_constraint;jj++) { 3038 for (ii=0;ii<primal_dofs;ii++) { 3039 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3040 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3041 } 3042 } 3043 if (!valid_qr) { 3044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3045 for (jj=0;jj<size_of_constraint;jj++) { 3046 for (ii=0;ii<primal_dofs;ii++) { 3047 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3048 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3049 } 3050 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3051 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3052 } 3053 } 3054 } 3055 } else { 3056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3057 } 3058 } 3059 } else { /* simple transformation block */ 3060 PetscInt row,col; 3061 PetscScalar val,norm; 3062 3063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3064 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3065 for (j=0;j<size_of_constraint;j++) { 3066 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3067 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3068 if (!PetscBTLookup(is_primal,row_B)) { 3069 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3070 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3071 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3072 } else { 3073 for (k=0;k<size_of_constraint;k++) { 3074 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3075 if (row != col) { 3076 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3077 } else { 3078 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3079 } 3080 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3081 } 3082 } 3083 } 3084 if (pcbddc->dbg_flag) { 3085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3086 } 3087 } 3088 } else { 3089 if (pcbddc->dbg_flag) { 3090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3091 } 3092 } 3093 } 3094 3095 /* free workspace */ 3096 if (qr_needed) { 3097 if (pcbddc->dbg_flag) { 3098 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3099 } 3100 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3101 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3102 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3103 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3104 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3105 } 3106 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3107 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 3110 /* assembling of global change of variable */ 3111 { 3112 Mat tmat; 3113 PetscInt bs; 3114 3115 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3116 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3117 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3118 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3119 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3120 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3121 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3122 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3123 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3124 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3125 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3126 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3127 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3128 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3129 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3130 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3131 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3132 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3133 } 3134 /* check */ 3135 if (pcbddc->dbg_flag) { 3136 PetscReal error; 3137 Vec x,x_change; 3138 3139 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3140 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3141 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3142 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3143 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3146 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3147 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3148 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3149 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3150 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3151 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3153 ierr = VecDestroy(&x);CHKERRQ(ierr); 3154 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3155 } 3156 3157 /* adapt sub_schurs computed (if any) */ 3158 if (pcbddc->use_deluxe_scaling) { 3159 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3160 if (sub_schurs->S_Ej_all) { 3161 Mat S_new,tmat; 3162 IS is_all_N; 3163 3164 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3165 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3166 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3167 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3168 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3169 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3170 sub_schurs->S_Ej_all = S_new; 3171 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3172 if (sub_schurs->sum_S_Ej_all) { 3173 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3174 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3175 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3176 sub_schurs->sum_S_Ej_all = S_new; 3177 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3178 } 3179 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3180 } 3181 } 3182 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3183 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3184 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3185 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3186 } 3187 3188 /* set up change of basis context */ 3189 if (pcbddc->ChangeOfBasisMatrix) { 3190 PCBDDCChange_ctx change_ctx; 3191 3192 if (!pcbddc->new_global_mat) { 3193 PetscInt global_size,local_size; 3194 3195 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3196 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3197 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3198 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3199 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3200 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3201 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3202 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3203 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3204 } else { 3205 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3206 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3207 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3208 } 3209 if (!pcbddc->user_ChangeOfBasisMatrix) { 3210 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3211 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3212 } else { 3213 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3214 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3215 } 3216 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3217 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3218 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3219 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3220 change_ctx->pc = pc; 3221 } 3222 3223 /* add pressure dof to set of primal nodes for numbering purposes */ 3224 if (pcbddc->benign_p0_lidx >= 0) { 3225 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3226 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3227 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3228 pcbddc->local_primal_size_cc++; 3229 pcbddc->local_primal_size++; 3230 } 3231 3232 /* check if a new primal space has been introduced (also take into account benign trick) */ 3233 pcbddc->new_primal_space_local = PETSC_TRUE; 3234 if (olocal_primal_size == pcbddc->local_primal_size) { 3235 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3236 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3237 if (!pcbddc->new_primal_space_local) { 3238 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3239 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3240 } 3241 } 3242 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3243 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3244 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3245 3246 /* flush dbg viewer */ 3247 if (pcbddc->dbg_flag) { 3248 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3249 } 3250 3251 /* free workspace */ 3252 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3253 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3254 if (!pcbddc->adaptive_selection) { 3255 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3256 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3257 } else { 3258 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3259 pcbddc->adaptive_constraints_idxs_ptr, 3260 pcbddc->adaptive_constraints_data_ptr, 3261 pcbddc->adaptive_constraints_idxs, 3262 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3263 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3264 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3265 } 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3271 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3272 { 3273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3274 PC_IS *pcis = (PC_IS*)pc->data; 3275 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3276 PetscInt ierr,i,vertex_size,N; 3277 PetscViewer viewer=pcbddc->dbg_viewer; 3278 3279 PetscFunctionBegin; 3280 /* Reset previously computed graph */ 3281 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3282 /* Init local Graph struct */ 3283 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3284 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3285 3286 /* Check validity of the csr graph passed in by the user */ 3287 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3288 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3289 } 3290 3291 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3292 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3293 PetscInt *xadj,*adjncy; 3294 PetscInt nvtxs; 3295 PetscBool flg_row=PETSC_FALSE; 3296 3297 if (pcbddc->use_local_adj) { 3298 3299 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3300 if (flg_row) { 3301 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3302 pcbddc->computed_rowadj = PETSC_TRUE; 3303 } 3304 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3305 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3306 IS is_dummy; 3307 ISLocalToGlobalMapping l2gmap_dummy; 3308 PetscInt j,sum; 3309 PetscInt *cxadj,*cadjncy; 3310 const PetscInt *idxs; 3311 PCBDDCGraph graph; 3312 PetscBT is_on_boundary; 3313 3314 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3315 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3316 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3317 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3318 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3319 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3320 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3321 if (flg_row) { 3322 graph->xadj = xadj; 3323 graph->adjncy = adjncy; 3324 } 3325 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3326 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3327 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3328 3329 if (pcbddc->dbg_flag) { 3330 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3331 for (i=0;i<graph->ncc;i++) { 3332 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3333 } 3334 } 3335 3336 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3337 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3338 for (i=0;i<pcis->n_B;i++) { 3339 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3340 } 3341 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3342 3343 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3344 sum = 0; 3345 for (i=0;i<graph->ncc;i++) { 3346 PetscInt sizecc = 0; 3347 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3348 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3349 sizecc++; 3350 } 3351 } 3352 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3353 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3354 cxadj[graph->queue[j]] = sizecc; 3355 } 3356 } 3357 sum += sizecc*sizecc; 3358 } 3359 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3360 sum = 0; 3361 for (i=0;i<pcis->n;i++) { 3362 PetscInt temp = cxadj[i]; 3363 cxadj[i] = sum; 3364 sum += temp; 3365 } 3366 cxadj[pcis->n] = sum; 3367 for (i=0;i<graph->ncc;i++) { 3368 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3369 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3370 PetscInt k,sizecc = 0; 3371 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3372 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3373 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3374 sizecc++; 3375 } 3376 } 3377 } 3378 } 3379 } 3380 if (sum) { 3381 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3382 } else { 3383 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3384 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3385 } 3386 graph->xadj = 0; 3387 graph->adjncy = 0; 3388 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3389 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3390 } 3391 } 3392 if (pcbddc->dbg_flag) { 3393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3394 } 3395 3396 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3397 vertex_size = 1; 3398 if (pcbddc->user_provided_isfordofs) { 3399 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3400 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3401 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3402 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3403 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3404 } 3405 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3406 pcbddc->n_ISForDofs = 0; 3407 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3408 } 3409 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3410 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3411 } else { 3412 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3413 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3414 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3415 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3416 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3417 } 3418 } 3419 } 3420 3421 /* Setup of Graph */ 3422 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3423 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3424 } 3425 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3426 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3427 } 3428 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3429 3430 /* Graph's connected components analysis */ 3431 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3432 3433 /* print some info to stdout */ 3434 if (pcbddc->dbg_flag) { 3435 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3436 } 3437 3438 /* mark topography has done */ 3439 pcbddc->recompute_topography = PETSC_FALSE; 3440 PetscFunctionReturn(0); 3441 } 3442 3443 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3444 #undef __FUNCT__ 3445 #define __FUNCT__ "PCBDDCSubsetNumbering" 3446 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3447 { 3448 PetscSF sf; 3449 PetscLayout map; 3450 const PetscInt *idxs; 3451 PetscInt *leaf_data,*root_data,*gidxs; 3452 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3453 PetscInt n_n,nlocals,start,first_index; 3454 PetscMPIInt commsize; 3455 PetscBool first_found; 3456 PetscErrorCode ierr; 3457 3458 PetscFunctionBegin; 3459 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3460 if (subset_mult) { 3461 PetscCheckSameComm(subset,1,subset_mult,2); 3462 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3463 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3464 } 3465 /* create workspace layout for computing global indices of subset */ 3466 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3467 lbounds[0] = lbounds[1] = 0; 3468 for (i=0;i<n;i++) { 3469 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3470 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3471 } 3472 lbounds[0] = -lbounds[0]; 3473 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3474 gbounds[0] = -gbounds[0]; 3475 N = gbounds[1] - gbounds[0] + 1; 3476 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3477 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3478 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3479 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3480 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3481 3482 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3483 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3484 if (subset_mult) { 3485 const PetscInt* idxs_mult; 3486 3487 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3488 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3489 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3490 } else { 3491 for (i=0;i<n;i++) leaf_data[i] = 1; 3492 } 3493 /* local size of new subset */ 3494 n_n = 0; 3495 for (i=0;i<n;i++) n_n += leaf_data[i]; 3496 3497 /* global indexes in layout */ 3498 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3499 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3500 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3501 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3502 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3503 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3504 3505 /* reduce from leaves to roots */ 3506 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3507 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3508 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3509 3510 /* count indexes in local part of layout */ 3511 nlocals = 0; 3512 first_index = -1; 3513 first_found = PETSC_FALSE; 3514 for (i=0;i<Nl;i++) { 3515 if (!first_found && root_data[i]) { 3516 first_found = PETSC_TRUE; 3517 first_index = i; 3518 } 3519 nlocals += root_data[i]; 3520 } 3521 3522 /* cumulative of number of indexes and size of subset without holes */ 3523 #if defined(PETSC_HAVE_MPI_EXSCAN) 3524 start = 0; 3525 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3526 #else 3527 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3528 start = start-nlocals; 3529 #endif 3530 3531 if (N_n) { /* compute total size of new subset if requested */ 3532 *N_n = start + nlocals; 3533 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3534 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3535 } 3536 3537 /* adapt root data with cumulative */ 3538 if (first_found) { 3539 PetscInt old_index; 3540 3541 root_data[first_index] += start; 3542 old_index = first_index; 3543 for (i=first_index+1;i<Nl;i++) { 3544 if (root_data[i]) { 3545 root_data[i] += root_data[old_index]; 3546 old_index = i; 3547 } 3548 } 3549 } 3550 3551 /* from roots to leaves */ 3552 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3553 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3555 3556 /* create new IS with global indexes without holes */ 3557 if (subset_mult) { 3558 const PetscInt* idxs_mult; 3559 PetscInt cum; 3560 3561 cum = 0; 3562 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3563 for (i=0;i<n;i++) { 3564 PetscInt j; 3565 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3566 } 3567 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3568 } else { 3569 for (i=0;i<n;i++) { 3570 gidxs[i] = leaf_data[i]-1; 3571 } 3572 } 3573 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3574 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3575 PetscFunctionReturn(0); 3576 } 3577 3578 #undef __FUNCT__ 3579 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3580 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3581 { 3582 PetscInt i,j; 3583 PetscScalar *alphas; 3584 PetscErrorCode ierr; 3585 3586 PetscFunctionBegin; 3587 /* this implements stabilized Gram-Schmidt */ 3588 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3589 for (i=0;i<n;i++) { 3590 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3591 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3592 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3593 } 3594 ierr = PetscFree(alphas);CHKERRQ(ierr); 3595 PetscFunctionReturn(0); 3596 } 3597 3598 #undef __FUNCT__ 3599 #define __FUNCT__ "MatISGetSubassemblingPattern" 3600 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3601 { 3602 IS ranks_send_to; 3603 PetscInt n_neighs,*neighs,*n_shared,**shared; 3604 PetscMPIInt size,rank,color; 3605 PetscInt *xadj,*adjncy; 3606 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3607 PetscInt i,local_size,threshold=0; 3608 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3609 PetscSubcomm subcomm; 3610 PetscErrorCode ierr; 3611 3612 PetscFunctionBegin; 3613 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3614 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3615 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3616 3617 /* Get info on mapping */ 3618 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3619 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3620 3621 /* build local CSR graph of subdomains' connectivity */ 3622 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3623 xadj[0] = 0; 3624 xadj[1] = PetscMax(n_neighs-1,0); 3625 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3626 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3627 3628 if (threshold) { 3629 PetscInt xadj_count = 0; 3630 for (i=1;i<n_neighs;i++) { 3631 if (n_shared[i] > threshold) { 3632 adjncy[xadj_count] = neighs[i]; 3633 adjncy_wgt[xadj_count] = n_shared[i]; 3634 xadj_count++; 3635 } 3636 } 3637 xadj[1] = xadj_count; 3638 } else { 3639 if (xadj[1]) { 3640 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3641 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3642 } 3643 } 3644 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3645 if (use_square) { 3646 for (i=0;i<xadj[1];i++) { 3647 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3648 } 3649 } 3650 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3651 3652 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3653 3654 /* 3655 Restrict work on active processes only. 3656 */ 3657 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3658 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3659 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3660 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3661 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3662 if (color) { 3663 ierr = PetscFree(xadj);CHKERRQ(ierr); 3664 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3665 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3666 } else { 3667 Mat subdomain_adj; 3668 IS new_ranks,new_ranks_contig; 3669 MatPartitioning partitioner; 3670 PetscInt prank,rstart=0,rend=0; 3671 PetscInt *is_indices,*oldranks; 3672 PetscBool aggregate; 3673 3674 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3675 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3676 prank = rank; 3677 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3678 /* 3679 for (i=0;i<size;i++) { 3680 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3681 } 3682 */ 3683 for (i=0;i<xadj[1];i++) { 3684 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3685 } 3686 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3687 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3688 if (aggregate) { 3689 PetscInt lrows,row,ncols,*cols; 3690 PetscMPIInt nrank; 3691 PetscScalar *vals; 3692 3693 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3694 lrows = 0; 3695 if (nrank<redprocs) { 3696 lrows = size/redprocs; 3697 if (nrank<size%redprocs) lrows++; 3698 } 3699 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3700 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3701 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3702 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3703 row = nrank; 3704 ncols = xadj[1]-xadj[0]; 3705 cols = adjncy; 3706 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3707 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3708 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3709 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3710 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = PetscFree(xadj);CHKERRQ(ierr); 3712 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3713 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3714 ierr = PetscFree(vals);CHKERRQ(ierr); 3715 } else { 3716 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3717 } 3718 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3719 3720 /* Partition */ 3721 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3722 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3723 if (use_vwgt) { 3724 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3725 v_wgt[0] = local_size; 3726 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3727 } 3728 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3729 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3730 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3731 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3732 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3733 3734 /* renumber new_ranks to avoid "holes" in new set of processors */ 3735 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3736 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3737 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3738 if (!redprocs) { 3739 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3740 } else { 3741 PetscInt idxs[1]; 3742 PetscMPIInt tag; 3743 MPI_Request *reqs; 3744 3745 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3746 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3747 for (i=rstart;i<rend;i++) { 3748 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3749 } 3750 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3751 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3752 ierr = PetscFree(reqs);CHKERRQ(ierr); 3753 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3754 } 3755 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3756 /* clean up */ 3757 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3758 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3759 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3760 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3761 } 3762 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3763 3764 /* assemble parallel IS for sends */ 3765 i = 1; 3766 if (color) i=0; 3767 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3768 /* get back IS */ 3769 *is_sends = ranks_send_to; 3770 PetscFunctionReturn(0); 3771 } 3772 3773 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatISSubassemble" 3777 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3778 { 3779 Mat local_mat; 3780 IS is_sends_internal; 3781 PetscInt rows,cols,new_local_rows; 3782 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3783 PetscBool ismatis,isdense,newisdense,destroy_mat; 3784 ISLocalToGlobalMapping l2gmap; 3785 PetscInt* l2gmap_indices; 3786 const PetscInt* is_indices; 3787 MatType new_local_type; 3788 /* buffers */ 3789 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3790 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3791 PetscInt *recv_buffer_idxs_local; 3792 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3793 /* MPI */ 3794 MPI_Comm comm,comm_n; 3795 PetscSubcomm subcomm; 3796 PetscMPIInt n_sends,n_recvs,commsize; 3797 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3798 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3799 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3800 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3801 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3802 PetscErrorCode ierr; 3803 3804 PetscFunctionBegin; 3805 /* TODO: add missing checks */ 3806 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3807 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3808 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3809 PetscValidLogicalCollectiveInt(mat,nis,7); 3810 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3811 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3812 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3813 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3814 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3815 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3816 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3817 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3818 PetscInt mrows,mcols,mnrows,mncols; 3819 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3820 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3821 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3822 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3823 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3824 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3825 } 3826 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3827 PetscValidLogicalCollectiveInt(mat,bs,0); 3828 /* prepare IS for sending if not provided */ 3829 if (!is_sends) { 3830 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3831 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3832 } else { 3833 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3834 is_sends_internal = is_sends; 3835 } 3836 3837 /* get comm */ 3838 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3839 3840 /* compute number of sends */ 3841 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3842 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3843 3844 /* compute number of receives */ 3845 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3846 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3847 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3848 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3849 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3850 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3851 ierr = PetscFree(iflags);CHKERRQ(ierr); 3852 3853 /* restrict comm if requested */ 3854 subcomm = 0; 3855 destroy_mat = PETSC_FALSE; 3856 if (restrict_comm) { 3857 PetscMPIInt color,subcommsize; 3858 3859 color = 0; 3860 if (restrict_full) { 3861 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3862 } else { 3863 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3864 } 3865 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3866 subcommsize = commsize - subcommsize; 3867 /* check if reuse has been requested */ 3868 if (reuse == MAT_REUSE_MATRIX) { 3869 if (*mat_n) { 3870 PetscMPIInt subcommsize2; 3871 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3872 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3873 comm_n = PetscObjectComm((PetscObject)*mat_n); 3874 } else { 3875 comm_n = PETSC_COMM_SELF; 3876 } 3877 } else { /* MAT_INITIAL_MATRIX */ 3878 PetscMPIInt rank; 3879 3880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3881 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3882 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3883 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3884 comm_n = PetscSubcommChild(subcomm); 3885 } 3886 /* flag to destroy *mat_n if not significative */ 3887 if (color) destroy_mat = PETSC_TRUE; 3888 } else { 3889 comm_n = comm; 3890 } 3891 3892 /* prepare send/receive buffers */ 3893 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3894 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3895 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3896 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3897 if (nis) { 3898 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3899 } 3900 3901 /* Get data from local matrices */ 3902 if (!isdense) { 3903 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3904 /* TODO: See below some guidelines on how to prepare the local buffers */ 3905 /* 3906 send_buffer_vals should contain the raw values of the local matrix 3907 send_buffer_idxs should contain: 3908 - MatType_PRIVATE type 3909 - PetscInt size_of_l2gmap 3910 - PetscInt global_row_indices[size_of_l2gmap] 3911 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3912 */ 3913 } else { 3914 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3915 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3916 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3917 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3918 send_buffer_idxs[1] = i; 3919 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3920 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3921 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3922 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3923 for (i=0;i<n_sends;i++) { 3924 ilengths_vals[is_indices[i]] = len*len; 3925 ilengths_idxs[is_indices[i]] = len+2; 3926 } 3927 } 3928 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3929 /* additional is (if any) */ 3930 if (nis) { 3931 PetscMPIInt psum; 3932 PetscInt j; 3933 for (j=0,psum=0;j<nis;j++) { 3934 PetscInt plen; 3935 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3936 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3937 psum += len+1; /* indices + lenght */ 3938 } 3939 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3940 for (j=0,psum=0;j<nis;j++) { 3941 PetscInt plen; 3942 const PetscInt *is_array_idxs; 3943 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3944 send_buffer_idxs_is[psum] = plen; 3945 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3946 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3947 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3948 psum += plen+1; /* indices + lenght */ 3949 } 3950 for (i=0;i<n_sends;i++) { 3951 ilengths_idxs_is[is_indices[i]] = psum; 3952 } 3953 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3954 } 3955 3956 buf_size_idxs = 0; 3957 buf_size_vals = 0; 3958 buf_size_idxs_is = 0; 3959 for (i=0;i<n_recvs;i++) { 3960 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3961 buf_size_vals += (PetscInt)olengths_vals[i]; 3962 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3963 } 3964 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3965 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3966 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3967 3968 /* get new tags for clean communications */ 3969 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3970 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3971 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3972 3973 /* allocate for requests */ 3974 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3975 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3976 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3977 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3978 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3979 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3980 3981 /* communications */ 3982 ptr_idxs = recv_buffer_idxs; 3983 ptr_vals = recv_buffer_vals; 3984 ptr_idxs_is = recv_buffer_idxs_is; 3985 for (i=0;i<n_recvs;i++) { 3986 source_dest = onodes[i]; 3987 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3988 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3989 ptr_idxs += olengths_idxs[i]; 3990 ptr_vals += olengths_vals[i]; 3991 if (nis) { 3992 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3993 ptr_idxs_is += olengths_idxs_is[i]; 3994 } 3995 } 3996 for (i=0;i<n_sends;i++) { 3997 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3998 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3999 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4000 if (nis) { 4001 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4002 } 4003 } 4004 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4005 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4006 4007 /* assemble new l2g map */ 4008 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4009 ptr_idxs = recv_buffer_idxs; 4010 new_local_rows = 0; 4011 for (i=0;i<n_recvs;i++) { 4012 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4013 ptr_idxs += olengths_idxs[i]; 4014 } 4015 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4016 ptr_idxs = recv_buffer_idxs; 4017 new_local_rows = 0; 4018 for (i=0;i<n_recvs;i++) { 4019 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4020 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4021 ptr_idxs += olengths_idxs[i]; 4022 } 4023 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4024 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4025 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4026 4027 /* infer new local matrix type from received local matrices type */ 4028 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4029 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4030 if (n_recvs) { 4031 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4032 ptr_idxs = recv_buffer_idxs; 4033 for (i=0;i<n_recvs;i++) { 4034 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4035 new_local_type_private = MATAIJ_PRIVATE; 4036 break; 4037 } 4038 ptr_idxs += olengths_idxs[i]; 4039 } 4040 switch (new_local_type_private) { 4041 case MATDENSE_PRIVATE: 4042 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4043 new_local_type = MATSEQAIJ; 4044 bs = 1; 4045 } else { /* if I receive only 1 dense matrix */ 4046 new_local_type = MATSEQDENSE; 4047 bs = 1; 4048 } 4049 break; 4050 case MATAIJ_PRIVATE: 4051 new_local_type = MATSEQAIJ; 4052 bs = 1; 4053 break; 4054 case MATBAIJ_PRIVATE: 4055 new_local_type = MATSEQBAIJ; 4056 break; 4057 case MATSBAIJ_PRIVATE: 4058 new_local_type = MATSEQSBAIJ; 4059 break; 4060 default: 4061 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4062 break; 4063 } 4064 } else { /* by default, new_local_type is seqdense */ 4065 new_local_type = MATSEQDENSE; 4066 bs = 1; 4067 } 4068 4069 /* create MATIS object if needed */ 4070 if (reuse == MAT_INITIAL_MATRIX) { 4071 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4072 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4073 } else { 4074 /* it also destroys the local matrices */ 4075 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4076 } 4077 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4078 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4079 4080 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4081 4082 /* Global to local map of received indices */ 4083 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4084 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4085 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4086 4087 /* restore attributes -> type of incoming data and its size */ 4088 buf_size_idxs = 0; 4089 for (i=0;i<n_recvs;i++) { 4090 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4091 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4092 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4093 } 4094 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4095 4096 /* set preallocation */ 4097 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4098 if (!newisdense) { 4099 PetscInt *new_local_nnz=0; 4100 4101 ptr_vals = recv_buffer_vals; 4102 ptr_idxs = recv_buffer_idxs_local; 4103 if (n_recvs) { 4104 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4105 } 4106 for (i=0;i<n_recvs;i++) { 4107 PetscInt j; 4108 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4109 for (j=0;j<*(ptr_idxs+1);j++) { 4110 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4111 } 4112 } else { 4113 /* TODO */ 4114 } 4115 ptr_idxs += olengths_idxs[i]; 4116 } 4117 if (new_local_nnz) { 4118 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4119 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4120 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4121 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4122 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4123 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4124 } else { 4125 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4126 } 4127 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4128 } else { 4129 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4130 } 4131 4132 /* set values */ 4133 ptr_vals = recv_buffer_vals; 4134 ptr_idxs = recv_buffer_idxs_local; 4135 for (i=0;i<n_recvs;i++) { 4136 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4137 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4138 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4139 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4140 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4141 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4142 } else { 4143 /* TODO */ 4144 } 4145 ptr_idxs += olengths_idxs[i]; 4146 ptr_vals += olengths_vals[i]; 4147 } 4148 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4152 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4153 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4154 4155 #if 0 4156 if (!restrict_comm) { /* check */ 4157 Vec lvec,rvec; 4158 PetscReal infty_error; 4159 4160 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4161 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4162 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4163 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4164 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4165 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4166 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4167 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4168 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4169 } 4170 #endif 4171 4172 /* assemble new additional is (if any) */ 4173 if (nis) { 4174 PetscInt **temp_idxs,*count_is,j,psum; 4175 4176 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4177 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4178 ptr_idxs = recv_buffer_idxs_is; 4179 psum = 0; 4180 for (i=0;i<n_recvs;i++) { 4181 for (j=0;j<nis;j++) { 4182 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4183 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4184 psum += plen; 4185 ptr_idxs += plen+1; /* shift pointer to received data */ 4186 } 4187 } 4188 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4189 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4190 for (i=1;i<nis;i++) { 4191 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4192 } 4193 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4194 ptr_idxs = recv_buffer_idxs_is; 4195 for (i=0;i<n_recvs;i++) { 4196 for (j=0;j<nis;j++) { 4197 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4198 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4199 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4200 ptr_idxs += plen+1; /* shift pointer to received data */ 4201 } 4202 } 4203 for (i=0;i<nis;i++) { 4204 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4205 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4206 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4207 } 4208 ierr = PetscFree(count_is);CHKERRQ(ierr); 4209 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4210 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4211 } 4212 /* free workspace */ 4213 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4214 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4215 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4216 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4217 if (isdense) { 4218 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4219 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4220 } else { 4221 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4222 } 4223 if (nis) { 4224 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4225 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4226 } 4227 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4228 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4229 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4230 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4231 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4232 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4233 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4234 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4235 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4236 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4237 ierr = PetscFree(onodes);CHKERRQ(ierr); 4238 if (nis) { 4239 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4240 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4241 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4242 } 4243 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4244 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4245 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4246 for (i=0;i<nis;i++) { 4247 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4248 } 4249 *mat_n = NULL; 4250 } 4251 PetscFunctionReturn(0); 4252 } 4253 4254 /* temporary hack into ksp private data structure */ 4255 #include <petsc/private/kspimpl.h> 4256 4257 #undef __FUNCT__ 4258 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4259 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4260 { 4261 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4262 PC_IS *pcis = (PC_IS*)pc->data; 4263 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4264 MatNullSpace CoarseNullSpace=NULL; 4265 ISLocalToGlobalMapping coarse_islg; 4266 IS coarse_is,*isarray; 4267 PetscInt i,im_active=-1,active_procs=-1; 4268 PetscInt nis,nisdofs,nisneu; 4269 PC pc_temp; 4270 PCType coarse_pc_type; 4271 KSPType coarse_ksp_type; 4272 PetscBool multilevel_requested,multilevel_allowed; 4273 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4274 Mat t_coarse_mat_is; 4275 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4276 PetscMPIInt all_procs; 4277 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4278 PetscBool compute_vecs = PETSC_FALSE; 4279 PetscScalar *array; 4280 PetscErrorCode ierr; 4281 4282 PetscFunctionBegin; 4283 /* Assign global numbering to coarse dofs */ 4284 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4285 PetscInt ocoarse_size; 4286 compute_vecs = PETSC_TRUE; 4287 ocoarse_size = pcbddc->coarse_size; 4288 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4289 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4290 /* see if we can avoid some work */ 4291 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4292 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4293 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4294 PC pc; 4295 PetscBool isbddc; 4296 4297 /* temporary workaround since PCBDDC does not have a reset method so far */ 4298 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4299 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4300 if (isbddc) { 4301 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4302 } 4303 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4304 coarse_reuse = PETSC_FALSE; 4305 } else { /* we can safely reuse already computed coarse matrix */ 4306 coarse_reuse = PETSC_TRUE; 4307 } 4308 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4309 coarse_reuse = PETSC_FALSE; 4310 } 4311 /* reset any subassembling information */ 4312 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4313 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4314 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4315 coarse_reuse = PETSC_TRUE; 4316 } 4317 4318 /* count "active" (i.e. with positive local size) and "void" processes */ 4319 im_active = !!(pcis->n); 4320 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4321 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4322 void_procs = all_procs-active_procs; 4323 csin_type_simple = PETSC_TRUE; 4324 redist = PETSC_FALSE; 4325 if (pcbddc->current_level && void_procs) { 4326 csin_ml = PETSC_TRUE; 4327 ncoarse_ml = void_procs; 4328 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4329 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4330 csin_ds = PETSC_TRUE; 4331 ncoarse_ds = pcbddc->redistribute_coarse; 4332 redist = PETSC_TRUE; 4333 } else { 4334 csin_ds = PETSC_TRUE; 4335 ncoarse_ds = active_procs; 4336 redist = PETSC_TRUE; 4337 } 4338 } else { 4339 csin_ml = PETSC_FALSE; 4340 ncoarse_ml = all_procs; 4341 if (void_procs) { 4342 csin_ds = PETSC_TRUE; 4343 ncoarse_ds = void_procs; 4344 csin_type_simple = PETSC_FALSE; 4345 } else { 4346 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4347 csin_ds = PETSC_TRUE; 4348 ncoarse_ds = pcbddc->redistribute_coarse; 4349 redist = PETSC_TRUE; 4350 } else { 4351 csin_ds = PETSC_FALSE; 4352 ncoarse_ds = all_procs; 4353 } 4354 } 4355 } 4356 4357 /* 4358 test if we can go multilevel: three conditions must be satisfied: 4359 - we have not exceeded the number of levels requested 4360 - we can actually subassemble the active processes 4361 - we can find a suitable number of MPI processes where we can place the subassembled problem 4362 */ 4363 multilevel_allowed = PETSC_FALSE; 4364 multilevel_requested = PETSC_FALSE; 4365 if (pcbddc->current_level < pcbddc->max_levels) { 4366 multilevel_requested = PETSC_TRUE; 4367 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4368 multilevel_allowed = PETSC_FALSE; 4369 } else { 4370 multilevel_allowed = PETSC_TRUE; 4371 } 4372 } 4373 /* determine number of process partecipating to coarse solver */ 4374 if (multilevel_allowed) { 4375 ncoarse = ncoarse_ml; 4376 csin = csin_ml; 4377 redist = PETSC_FALSE; 4378 } else { 4379 ncoarse = ncoarse_ds; 4380 csin = csin_ds; 4381 } 4382 4383 /* creates temporary l2gmap and IS for coarse indexes */ 4384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4385 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4386 4387 /* creates temporary MATIS object for coarse matrix */ 4388 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4389 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4390 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4391 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4392 #if 1 4393 { 4394 PetscViewer viewer; 4395 char filename[256]; 4396 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4397 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4398 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4399 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4400 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4401 } 4402 #endif 4403 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4404 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4405 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4406 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4407 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4408 4409 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4410 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4411 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4412 const PetscInt *idxs; 4413 ISLocalToGlobalMapping tmap; 4414 4415 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4416 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4417 /* allocate space for temporary storage */ 4418 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4419 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4420 /* allocate for IS array */ 4421 nisdofs = pcbddc->n_ISForDofsLocal; 4422 nisneu = !!pcbddc->NeumannBoundariesLocal; 4423 nis = nisdofs + nisneu; 4424 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4425 /* dofs splitting */ 4426 for (i=0;i<nisdofs;i++) { 4427 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4428 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4429 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4430 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4431 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4432 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4433 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4434 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4435 } 4436 /* neumann boundaries */ 4437 if (pcbddc->NeumannBoundariesLocal) { 4438 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4439 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4440 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4441 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4442 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4443 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4444 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4445 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4446 } 4447 /* free memory */ 4448 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4449 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4450 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4451 } else { 4452 nis = 0; 4453 nisdofs = 0; 4454 nisneu = 0; 4455 isarray = NULL; 4456 } 4457 /* destroy no longer needed map */ 4458 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4459 4460 /* restrict on coarse candidates (if needed) */ 4461 coarse_mat_is = NULL; 4462 if (csin) { 4463 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4464 if (redist) { 4465 PetscMPIInt rank; 4466 PetscInt spc,n_spc_p1,dest[1],destsize; 4467 4468 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4469 spc = active_procs/ncoarse; 4470 n_spc_p1 = active_procs%ncoarse; 4471 if (im_active) { 4472 destsize = 1; 4473 if (rank > n_spc_p1*(spc+1)-1) { 4474 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4475 } else { 4476 dest[0] = rank/(spc+1); 4477 } 4478 } else { 4479 destsize = 0; 4480 } 4481 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4482 } else if (csin_type_simple) { 4483 PetscMPIInt rank; 4484 PetscInt issize,isidx; 4485 4486 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4487 if (im_active) { 4488 issize = 1; 4489 isidx = (PetscInt)rank; 4490 } else { 4491 issize = 0; 4492 isidx = -1; 4493 } 4494 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4495 } else { /* get a suitable subassembling pattern from MATIS code */ 4496 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4497 } 4498 4499 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4500 if (!redist || ncoarse <= void_procs) { 4501 PetscInt ncoarse_cand,tissize,*nisindices; 4502 PetscInt *coarse_candidates; 4503 const PetscInt* tisindices; 4504 4505 /* get coarse candidates' ranks in pc communicator */ 4506 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4507 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4508 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4509 if (!coarse_candidates[i]) { 4510 coarse_candidates[ncoarse_cand++]=i; 4511 } 4512 } 4513 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4514 4515 4516 if (pcbddc->dbg_flag) { 4517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4519 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4521 for (i=0;i<ncoarse_cand;i++) { 4522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4523 } 4524 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4525 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4526 } 4527 /* shift the pattern on coarse candidates */ 4528 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4529 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4530 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4531 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4532 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4533 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4534 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4535 } 4536 if (pcbddc->dbg_flag) { 4537 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4539 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4541 } 4542 } 4543 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4544 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4545 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4546 } else { /* this is the last level, so use just receiving processes in subcomm */ 4547 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4548 } 4549 } else { 4550 if (pcbddc->dbg_flag) { 4551 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4553 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4554 } 4555 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4556 coarse_mat_is = t_coarse_mat_is; 4557 } 4558 4559 /* create local to global scatters for coarse problem */ 4560 if (compute_vecs) { 4561 PetscInt lrows; 4562 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4563 if (coarse_mat_is) { 4564 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4565 } else { 4566 lrows = 0; 4567 } 4568 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4569 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4570 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4571 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4572 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4573 } 4574 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4575 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4576 4577 /* set defaults for coarse KSP and PC */ 4578 if (multilevel_allowed) { 4579 coarse_ksp_type = KSPRICHARDSON; 4580 coarse_pc_type = PCBDDC; 4581 } else { 4582 coarse_ksp_type = KSPPREONLY; 4583 coarse_pc_type = PCREDUNDANT; 4584 } 4585 4586 /* print some info if requested */ 4587 if (pcbddc->dbg_flag) { 4588 if (!multilevel_allowed) { 4589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4590 if (multilevel_requested) { 4591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4592 } else if (pcbddc->max_levels) { 4593 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4594 } 4595 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4596 } 4597 } 4598 4599 /* create the coarse KSP object only once with defaults */ 4600 if (coarse_mat_is) { 4601 MatReuse coarse_mat_reuse; 4602 PetscViewer dbg_viewer = NULL; 4603 if (pcbddc->dbg_flag) { 4604 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4605 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4606 } 4607 if (!pcbddc->coarse_ksp) { 4608 char prefix[256],str_level[16]; 4609 size_t len; 4610 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4611 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4612 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4613 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4614 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4615 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4616 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4617 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4618 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4619 /* prefix */ 4620 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4621 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4622 if (!pcbddc->current_level) { 4623 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4624 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4625 } else { 4626 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4627 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4628 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4629 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4630 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4631 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4632 } 4633 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4634 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4635 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4636 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4637 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4638 /* allow user customization */ 4639 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4640 } 4641 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4642 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4643 if (nisdofs) { 4644 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4645 for (i=0;i<nisdofs;i++) { 4646 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4647 } 4648 } 4649 if (nisneu) { 4650 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4651 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4652 } 4653 4654 /* get some info after set from options */ 4655 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4656 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4657 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4658 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4659 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4660 isbddc = PETSC_FALSE; 4661 } 4662 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4663 if (isredundant) { 4664 KSP inner_ksp; 4665 PC inner_pc; 4666 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4667 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4668 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4669 } 4670 4671 /* assemble coarse matrix */ 4672 if (coarse_reuse) { 4673 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4674 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4675 coarse_mat_reuse = MAT_REUSE_MATRIX; 4676 } else { 4677 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4678 } 4679 if (isbddc || isnn) { 4680 if (pcbddc->coarsening_ratio > 1) { 4681 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4682 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4683 if (pcbddc->dbg_flag) { 4684 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4685 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4686 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4687 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4688 } 4689 } 4690 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4691 } else { 4692 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4693 coarse_mat = coarse_mat_is; 4694 } 4695 } else { 4696 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4697 } 4698 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4699 4700 /* propagate symmetry info of coarse matrix */ 4701 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4702 if (pc->pmat->symmetric_set) { 4703 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4704 } 4705 if (pc->pmat->hermitian_set) { 4706 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4707 } 4708 if (pc->pmat->spd_set) { 4709 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4710 } 4711 /* set operators */ 4712 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4713 if (pcbddc->dbg_flag) { 4714 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4715 } 4716 } else { /* processes non partecipating to coarse solver (if any) */ 4717 coarse_mat = 0; 4718 } 4719 ierr = PetscFree(isarray);CHKERRQ(ierr); 4720 #if 1 4721 { 4722 PetscViewer viewer; 4723 char filename[256]; 4724 sprintf(filename,"coarse_mat.m"); 4725 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4726 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4727 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4728 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4729 } 4730 #endif 4731 4732 /* Compute coarse null space (special handling by BDDC only) */ 4733 #if 0 4734 if (pcbddc->NullSpace) { 4735 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4736 } 4737 #endif 4738 4739 if (pcbddc->coarse_ksp) { 4740 Vec crhs,csol; 4741 PetscBool ispreonly; 4742 4743 if (CoarseNullSpace) { 4744 if (isbddc) { 4745 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4746 } else { 4747 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4748 } 4749 } 4750 /* setup coarse ksp */ 4751 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4752 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4753 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4754 /* hack */ 4755 if (!csol) { 4756 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4757 } 4758 if (!crhs) { 4759 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4760 } 4761 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4762 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4763 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4764 KSP check_ksp; 4765 KSPType check_ksp_type; 4766 PC check_pc; 4767 Vec check_vec,coarse_vec; 4768 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4769 PetscInt its; 4770 PetscBool compute_eigs; 4771 PetscReal *eigs_r,*eigs_c; 4772 PetscInt neigs; 4773 const char *prefix; 4774 4775 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4776 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4777 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4778 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4779 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4780 if (ispreonly) { 4781 check_ksp_type = KSPPREONLY; 4782 compute_eigs = PETSC_FALSE; 4783 } else { 4784 check_ksp_type = KSPGMRES; 4785 compute_eigs = PETSC_TRUE; 4786 } 4787 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4788 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4789 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4790 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4791 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4792 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4793 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4794 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4795 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4796 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4797 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4798 /* create random vec */ 4799 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4800 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4801 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4802 if (CoarseNullSpace) { 4803 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4804 } 4805 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4806 /* solve coarse problem */ 4807 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4808 if (CoarseNullSpace) { 4809 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4810 } 4811 /* set eigenvalue estimation if preonly has not been requested */ 4812 if (compute_eigs) { 4813 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4814 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4815 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4816 lambda_max = eigs_r[neigs-1]; 4817 lambda_min = eigs_r[0]; 4818 if (pcbddc->use_coarse_estimates) { 4819 if (lambda_max>lambda_min) { 4820 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4821 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4822 } 4823 } 4824 } 4825 4826 /* check coarse problem residual error */ 4827 if (pcbddc->dbg_flag) { 4828 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4829 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4830 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4831 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4832 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4833 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4834 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4836 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4837 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4838 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4839 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4840 if (compute_eigs) { 4841 PetscReal lambda_max_s,lambda_min_s; 4842 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4843 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4844 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4845 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4846 for (i=0;i<neigs;i++) { 4847 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4848 } 4849 } 4850 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4851 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4852 } 4853 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4854 if (compute_eigs) { 4855 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4856 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4857 } 4858 } 4859 } 4860 /* print additional info */ 4861 if (pcbddc->dbg_flag) { 4862 /* waits until all processes reaches this point */ 4863 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4866 } 4867 4868 /* free memory */ 4869 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4870 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4876 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4877 { 4878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4879 PC_IS* pcis = (PC_IS*)pc->data; 4880 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4881 IS subset,subset_mult,subset_n; 4882 PetscInt local_size,coarse_size=0; 4883 PetscInt *local_primal_indices=NULL; 4884 const PetscInt *t_local_primal_indices; 4885 PetscErrorCode ierr; 4886 4887 PetscFunctionBegin; 4888 /* Compute global number of coarse dofs */ 4889 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4890 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4891 } 4892 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4893 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4894 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4895 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4896 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4897 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4898 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4899 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4900 if (local_size != pcbddc->local_primal_size) { 4901 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4902 } 4903 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4904 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4905 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4906 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4907 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4908 4909 /* check numbering */ 4910 if (pcbddc->dbg_flag) { 4911 PetscScalar coarsesum,*array,*array2; 4912 PetscInt i; 4913 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4914 4915 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4918 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4919 /* counter */ 4920 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4921 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4924 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4925 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4926 4927 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4928 for (i=0;i<pcbddc->local_primal_size;i++) { 4929 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4930 } 4931 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4932 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4933 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4934 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4935 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4936 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4937 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4938 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4939 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4940 for (i=0;i<pcis->n;i++) { 4941 if (array[i] != 0.0 && array[i] != array2[i]) { 4942 PetscInt owned = (PetscInt)(array[i]); 4943 PetscInt neigh = (PetscInt)(array2[i]); 4944 set_error = PETSC_TRUE; 4945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4946 } 4947 } 4948 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4949 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4951 for (i=0;i<pcis->n;i++) { 4952 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4953 } 4954 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4955 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4956 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4957 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4958 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4959 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4960 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4961 PetscInt *gidxs; 4962 4963 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4964 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4968 for (i=0;i<pcbddc->local_primal_size;i++) { 4969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4970 } 4971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4972 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4973 } 4974 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4975 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4976 } 4977 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4978 /* get back data */ 4979 *coarse_size_n = coarse_size; 4980 *local_primal_indices_n = local_primal_indices; 4981 PetscFunctionReturn(0); 4982 } 4983 4984 #undef __FUNCT__ 4985 #define __FUNCT__ "PCBDDCGlobalToLocal" 4986 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4987 { 4988 IS localis_t; 4989 PetscInt i,lsize,*idxs,n; 4990 PetscScalar *vals; 4991 PetscErrorCode ierr; 4992 4993 PetscFunctionBegin; 4994 /* get indices in local ordering exploiting local to global map */ 4995 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4996 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4997 for (i=0;i<lsize;i++) vals[i] = 1.0; 4998 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4999 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5000 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5001 if (idxs) { /* multilevel guard */ 5002 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5003 } 5004 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5005 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5006 ierr = PetscFree(vals);CHKERRQ(ierr); 5007 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5008 /* now compute set in local ordering */ 5009 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5010 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5011 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5012 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5013 for (i=0,lsize=0;i<n;i++) { 5014 if (PetscRealPart(vals[i]) > 0.5) { 5015 lsize++; 5016 } 5017 } 5018 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5019 for (i=0,lsize=0;i<n;i++) { 5020 if (PetscRealPart(vals[i]) > 0.5) { 5021 idxs[lsize++] = i; 5022 } 5023 } 5024 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5025 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5026 *localis = localis_t; 5027 PetscFunctionReturn(0); 5028 } 5029 5030 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5031 #undef __FUNCT__ 5032 #define __FUNCT__ "PCBDDCMatMult_Private" 5033 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5034 { 5035 PCBDDCChange_ctx change_ctx; 5036 PC_IS *pcis; 5037 PC_BDDC *pcbddc; 5038 PetscErrorCode ierr; 5039 5040 PetscFunctionBegin; 5041 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5042 pcis = (PC_IS*)(change_ctx->pc->data); 5043 pcbddc = (PC_BDDC*)(change_ctx->pc->data); 5044 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5045 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5046 if (pcbddc->benign_saddle_point) { 5047 Mat_IS* matis = (Mat_IS*)(change_ctx->original_mat->data); 5048 ierr = VecScatterBegin(matis->rctx,change_ctx->work[1],matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5049 ierr = VecScatterEnd(matis->rctx,change_ctx->work[1],matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5050 5051 if (pcbddc->benign_p0_lidx >=0) { 5052 Mat B0; 5053 Vec dummy_vec; 5054 PetscScalar *array; 5055 PetscInt ii[2]; 5056 5057 ii[0] = 0; 5058 ii[1] = pcbddc->B0_ncol; 5059 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 5060 ierr = MatCreateVecs(B0,NULL,&dummy_vec);CHKERRQ(ierr); 5061 ierr = MatMult(B0,matis->x,dummy_vec);CHKERRQ(ierr); 5062 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5063 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] benign? %1.4e\n",PetscGlobalRank,array[0]);CHKERRQ(ierr); 5064 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5065 ierr = MatDestroy(&B0);CHKERRQ(ierr); 5066 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5067 } 5068 } 5069 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5070 PetscFunctionReturn(0); 5071 } 5072 5073 #undef __FUNCT__ 5074 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5075 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5076 { 5077 PCBDDCChange_ctx change_ctx; 5078 PetscErrorCode ierr; 5079 5080 PetscFunctionBegin; 5081 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5082 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5083 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5084 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5085 PetscFunctionReturn(0); 5086 } 5087 5088 #undef __FUNCT__ 5089 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5090 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5091 { 5092 PC_IS *pcis=(PC_IS*)pc->data; 5093 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5094 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5095 Mat S_j; 5096 PetscInt *used_xadj,*used_adjncy; 5097 PetscBool free_used_adj; 5098 PetscErrorCode ierr; 5099 5100 PetscFunctionBegin; 5101 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5102 free_used_adj = PETSC_FALSE; 5103 if (pcbddc->sub_schurs_layers == -1) { 5104 used_xadj = NULL; 5105 used_adjncy = NULL; 5106 } else { 5107 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5108 used_xadj = pcbddc->mat_graph->xadj; 5109 used_adjncy = pcbddc->mat_graph->adjncy; 5110 } else if (pcbddc->computed_rowadj) { 5111 used_xadj = pcbddc->mat_graph->xadj; 5112 used_adjncy = pcbddc->mat_graph->adjncy; 5113 } else { 5114 PetscBool flg_row=PETSC_FALSE; 5115 const PetscInt *xadj,*adjncy; 5116 PetscInt nvtxs; 5117 5118 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5119 if (flg_row) { 5120 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5121 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5122 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5123 free_used_adj = PETSC_TRUE; 5124 } else { 5125 pcbddc->sub_schurs_layers = -1; 5126 used_xadj = NULL; 5127 used_adjncy = NULL; 5128 } 5129 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5130 } 5131 } 5132 5133 /* setup sub_schurs data */ 5134 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5135 if (!sub_schurs->use_mumps) { 5136 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5137 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5138 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5139 } else { 5140 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5141 PetscBool isseqaij; 5142 if (!pcbddc->use_vertices && reuse_solvers) { 5143 PetscInt n_vertices; 5144 5145 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5146 reuse_solvers = (PetscBool)!n_vertices; 5147 } 5148 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5149 if (!isseqaij) { 5150 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5151 if (matis->A == pcbddc->local_mat) { 5152 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5153 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5154 } else { 5155 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5156 } 5157 } 5158 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5159 } 5160 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5161 5162 /* free adjacency */ 5163 if (free_used_adj) { 5164 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5165 } 5166 PetscFunctionReturn(0); 5167 } 5168 5169 #undef __FUNCT__ 5170 #define __FUNCT__ "PCBDDCInitSubSchurs" 5171 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5172 { 5173 PC_IS *pcis=(PC_IS*)pc->data; 5174 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5175 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5176 PCBDDCGraph graph; 5177 PetscErrorCode ierr; 5178 5179 PetscFunctionBegin; 5180 /* attach interface graph for determining subsets */ 5181 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5182 IS verticesIS,verticescomm; 5183 PetscInt vsize,*idxs; 5184 5185 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5186 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5187 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5188 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5189 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5190 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5191 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5192 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5193 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5194 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5195 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5196 /* 5197 if (pcbddc->dbg_flag) { 5198 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5199 } 5200 */ 5201 } else { 5202 graph = pcbddc->mat_graph; 5203 } 5204 5205 /* sub_schurs init */ 5206 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5207 5208 /* free graph struct */ 5209 if (pcbddc->sub_schurs_rebuild) { 5210 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5211 } 5212 PetscFunctionReturn(0); 5213 } 5214