1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 */ 53 if (pcbddc->benign_p0_lidx < 0) { 54 PetscFunctionReturn(0); 55 } 56 if (pop) { 57 const PetscInt *cB0_cols; 58 PetscInt cB0_ncol; 59 const PetscScalar *cB0_vals; 60 61 /* extract B_0 */ 62 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 pcbddc->B0_ncol = cB0_ncol; 64 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 65 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 68 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 69 /* remove rows and cols from local problem */ 70 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 72 } else { /* push */ 73 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 74 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 77 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 } 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCAdaptiveSelection" 84 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 85 { 86 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 87 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 88 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 89 PetscBLASInt *B_iwork,*B_ifail; 90 PetscScalar *work,lwork; 91 PetscScalar *St,*S,*eigv; 92 PetscScalar *Sarray,*Starray; 93 PetscReal *eigs,thresh; 94 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 95 PetscBool allocated_S_St; 96 #if defined(PETSC_USE_COMPLEX) 97 PetscReal *rwork; 98 #endif 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 if (!sub_schurs->use_mumps) { 103 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 104 } 105 106 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 107 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 108 } 109 110 if (pcbddc->dbg_flag) { 111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 115 } 116 117 if (pcbddc->dbg_flag) { 118 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 119 } 120 121 /* max size of subsets */ 122 mss = 0; 123 for (i=0;i<sub_schurs->n_subs;i++) { 124 PetscInt subset_size; 125 126 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 127 mss = PetscMax(mss,subset_size); 128 } 129 130 /* min/max and threshold */ 131 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 132 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 133 nmax = PetscMax(nmin,nmax); 134 allocated_S_St = PETSC_FALSE; 135 if (nmin) { 136 allocated_S_St = PETSC_TRUE; 137 } 138 139 /* allocate lapack workspace */ 140 cum = cum2 = 0; 141 maxneigs = 0; 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt n,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 n = PetscMin(subset_size,nmax); 147 cum += subset_size; 148 cum2 += subset_size*n; 149 maxneigs = PetscMax(maxneigs,n); 150 } 151 if (mss) { 152 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 153 PetscBLASInt B_itype = 1; 154 PetscBLASInt B_N = mss; 155 PetscReal zero = 0.0; 156 PetscReal eps = 0.0; /* dlamch? */ 157 158 B_lwork = -1; 159 S = NULL; 160 St = NULL; 161 eigs = NULL; 162 eigv = NULL; 163 B_iwork = NULL; 164 B_ifail = NULL; 165 #if defined(PETSC_USE_COMPLEX) 166 rwork = NULL; 167 #endif 168 thresh = 1.0; 169 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 170 #if defined(PETSC_USE_COMPLEX) 171 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 172 #else 173 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 174 #endif 175 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 176 ierr = PetscFPTrapPop();CHKERRQ(ierr); 177 } else { 178 /* TODO */ 179 } 180 } else { 181 lwork = 0; 182 } 183 184 nv = 0; 185 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 186 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 187 } 188 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 189 if (allocated_S_St) { 190 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 191 } 192 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 193 #if defined(PETSC_USE_COMPLEX) 194 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 195 #endif 196 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 197 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 199 nv+cum,&pcbddc->adaptive_constraints_idxs, 200 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 201 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 202 203 maxneigs = 0; 204 cum = cum2 = cumarray = 0; 205 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 206 pcbddc->adaptive_constraints_data_ptr[0] = 0; 207 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 208 const PetscInt *idxs; 209 210 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 211 for (cum=0;cum<nv;cum++) { 212 pcbddc->adaptive_constraints_n[cum] = 1; 213 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 214 pcbddc->adaptive_constraints_data[cum] = 1.0; 215 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 216 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 217 } 218 cum2 = cum; 219 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 220 } 221 222 if (mss) { /* multilevel */ 223 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 225 } 226 227 for (i=0;i<sub_schurs->n_subs;i++) { 228 229 const PetscInt *idxs; 230 PetscReal infty = PETSC_MAX_REAL; 231 PetscInt j,subset_size,eigs_start = 0; 232 PetscBLASInt B_N; 233 PetscBool same_data = PETSC_FALSE; 234 235 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 236 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 237 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 238 if (sub_schurs->is_hermitian) { 239 PetscInt j,k; 240 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 241 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 for (j=0;j<subset_size;j++) { 245 for (k=j;k<subset_size;k++) { 246 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 247 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 248 } 249 } 250 } else { 251 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 } else { 255 S = Sarray + cumarray; 256 St = Starray + cumarray; 257 } 258 259 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 260 /* see if we can save some work */ 261 if (sub_schurs->n_subs == 1) { 262 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 263 } 264 265 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 266 B_neigs = 0; 267 } else { 268 /* Threshold: this is an heuristic for edges */ 269 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 270 271 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 272 PetscBLASInt B_itype = 1; 273 PetscBLASInt B_IL, B_IU; 274 PetscReal eps = -1.0; /* dlamch? */ 275 PetscInt nmin_s; 276 277 /* ask for eigenvalues larger than thresh */ 278 if (pcbddc->dbg_flag) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 280 } 281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 282 #if defined(PETSC_USE_COMPLEX) 283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 284 #else 285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 286 #endif 287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 288 if (B_ierr) { 289 if (B_ierr < 0 ) { 290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 291 } else if (B_ierr <= B_N) { 292 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 293 } else { 294 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 295 } 296 } 297 298 if (B_neigs > nmax) { 299 if (pcbddc->dbg_flag) { 300 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 301 } 302 eigs_start = B_neigs -nmax; 303 B_neigs = nmax; 304 } 305 306 nmin_s = PetscMin(nmin,B_N); 307 if (B_neigs < nmin_s) { 308 PetscBLASInt B_neigs2; 309 310 B_IU = B_N - B_neigs; 311 B_IL = B_N - nmin_s + 1; 312 if (pcbddc->dbg_flag) { 313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 314 } 315 if (sub_schurs->is_hermitian) { 316 PetscInt j; 317 for (j=0;j<subset_size;j++) { 318 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 319 } 320 for (j=0;j<subset_size;j++) { 321 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 322 } 323 } else { 324 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 326 } 327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 330 #else 331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 332 #endif 333 ierr = PetscFPTrapPop();CHKERRQ(ierr); 334 B_neigs += B_neigs2; 335 } 336 if (B_ierr) { 337 if (B_ierr < 0 ) { 338 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 339 } else if (B_ierr <= B_N) { 340 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 341 } else { 342 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 343 } 344 } 345 if (pcbddc->dbg_flag) { 346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 347 for (j=0;j<B_neigs;j++) { 348 if (eigs[j] == 0.0) { 349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 352 } 353 } 354 } 355 } else { 356 /* TODO */ 357 } 358 } 359 maxneigs = PetscMax(B_neigs,maxneigs); 360 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 361 if (B_neigs) { 362 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 363 364 if (pcbddc->dbg_flag > 1) { 365 PetscInt ii; 366 for (ii=0;ii<B_neigs;ii++) { 367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 368 for (j=0;j<B_N;j++) { 369 #if defined(PETSC_USE_COMPLEX) 370 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 371 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 373 #else 374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 375 #endif 376 } 377 } 378 } 379 #if 0 380 for (j=0;j<B_neigs;j++) { 381 PetscBLASInt Blas_N,Blas_one = 1.0; 382 PetscScalar norm; 383 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 384 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 385 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 387 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 388 } else { 389 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 390 } 391 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 392 } 393 #endif 394 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 395 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 396 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 397 cum++; 398 } 399 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 400 /* shift for next computation */ 401 cumarray += subset_size*subset_size; 402 } 403 if (pcbddc->dbg_flag) { 404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 405 } 406 407 if (mss) { 408 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 409 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 410 /* destroy matrices (junk) */ 411 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 412 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 413 } 414 if (allocated_S_St) { 415 ierr = PetscFree2(S,St);CHKERRQ(ierr); 416 } 417 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 418 #if defined(PETSC_USE_COMPLEX) 419 ierr = PetscFree(rwork);CHKERRQ(ierr); 420 #endif 421 if (pcbddc->dbg_flag) { 422 PetscInt maxneigs_r; 423 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCSetUpSolvers" 431 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 432 { 433 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *coarse_submat_vals; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 /* Setup local scatters R_to_B and (optionally) R_to_D */ 439 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 440 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 441 442 /* Setup local neumann solver ksp_R */ 443 /* PCBDDCSetUpLocalScatters should be called first! */ 444 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 445 446 /* Change global null space passed in by the user if change of basis has been requested */ 447 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 448 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 449 } 450 451 /* 452 Setup local correction and local part of coarse basis. 453 Gives back the dense local part of the coarse matrix in column major ordering 454 */ 455 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 456 457 /* Compute total number of coarse nodes and setup coarse solver */ 458 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 459 460 /* free */ 461 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "PCBDDCResetCustomization" 467 PetscErrorCode PCBDDCResetCustomization(PC pc) 468 { 469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 474 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 475 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 476 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 477 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 478 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 479 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 480 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 481 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 482 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 483 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "PCBDDCResetTopography" 489 PetscErrorCode PCBDDCResetTopography(PC pc) 490 { 491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 496 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 497 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 498 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 499 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCResetSolvers" 505 PetscErrorCode PCBDDCResetSolvers(PC pc) 506 { 507 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 508 PetscScalar *array; 509 PetscErrorCode ierr; 510 511 PetscFunctionBegin; 512 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 513 if (pcbddc->coarse_phi_B) { 514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 515 ierr = PetscFree(array);CHKERRQ(ierr); 516 } 517 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 519 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 521 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 523 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 524 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 525 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 526 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 527 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 528 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 529 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 531 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 532 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 535 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 536 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 537 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 538 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 539 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 540 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 541 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 542 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 543 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 544 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 550 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 551 { 552 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 553 PC_IS *pcis = (PC_IS*)pc->data; 554 VecType impVecType; 555 PetscInt n_constraints,n_R,old_size,n_benign; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 if (!pcbddc->ConstraintMatrix) { 560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 561 } 562 /* get sizes */ 563 n_benign = 0; 564 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 565 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 566 if (n_benign && n_constraints) { 567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 568 } 569 n_R = pcis->n - pcbddc->n_vertices; 570 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 571 /* local work vectors (try to avoid unneeded work)*/ 572 /* R nodes */ 573 old_size = -1; 574 if (pcbddc->vec1_R) { 575 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 576 } 577 if (n_R != old_size) { 578 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 579 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 580 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 581 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 582 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 583 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 584 } 585 /* local primal dofs */ 586 old_size = -1; 587 if (pcbddc->vec1_P) { 588 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 589 } 590 if (pcbddc->local_primal_size != old_size) { 591 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 592 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 593 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 594 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 595 } 596 /* local explicit constraints */ 597 old_size = -1; 598 if (pcbddc->vec1_C) { 599 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 600 } 601 if (n_constraints && n_constraints != old_size) { 602 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 603 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 604 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 605 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 606 } 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNCT__ 611 #define __FUNCT__ "PCBDDCSetUpCorrection" 612 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 613 { 614 PetscErrorCode ierr; 615 /* pointers to pcis and pcbddc */ 616 PC_IS* pcis = (PC_IS*)pc->data; 617 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 618 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 619 /* submatrices of local problem */ 620 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 621 /* submatrices of benign trick */ 622 Mat B0_R,B0_V; 623 /* submatrices of local coarse problem */ 624 Mat S_VV,S_CV,S_VC,S_CC; 625 /* working matrices */ 626 Mat C_CR; 627 /* additional working stuff */ 628 PC pc_R; 629 Mat F,B0 = NULL; 630 Vec vec_R_benign,vec_R_benign_2; 631 PetscBool isLU,isCHOL,isILU; 632 633 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 634 PetscScalar *work; 635 PetscInt *idx_V_B; 636 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 637 PetscInt i,n_R,n_D,n_B; 638 PetscBool unsymmetric_check; 639 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 640 MatType impMatType; 641 /* some shortcuts to scalars */ 642 PetscScalar one=1.0,m_one=-1.0; 643 644 PetscFunctionBegin; 645 n_benign = 0; 646 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 647 n_vertices = pcbddc->n_vertices; 648 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 649 /* Set Non-overlapping dimensions */ 650 n_B = pcis->n_B; 651 n_D = pcis->n - n_B; 652 n_R = pcis->n - n_vertices; 653 654 /* Set types for local objects needed by BDDC precondtioner */ 655 impMatType = MATSEQDENSE; 656 657 /* vertices in boundary numbering */ 658 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 659 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 660 if (i != n_vertices) { 661 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 662 } 663 664 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 665 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 666 if (!n_benign) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 668 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 670 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 672 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 674 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 675 } else { /* these wrappers are not needed with the benign trick */ 676 S_VV = S_CV = S_VC = S_CC = NULL; 677 } 678 679 unsymmetric_check = PETSC_FALSE; 680 /* allocate workspace */ 681 n = 0; 682 if (n_constraints) { 683 n += n_R*n_constraints; 684 } 685 if (n_vertices) { 686 n = PetscMax(2*n_R*n_vertices,n); 687 n = PetscMax((n_R+n_B)*n_vertices,n); 688 } 689 if (!pcbddc->symmetric_primal) { 690 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 691 unsymmetric_check = PETSC_TRUE; 692 } 693 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 694 695 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 696 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 699 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 700 if (isLU || isILU || isCHOL) { 701 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 702 } else if (sub_schurs->reuse_mumps) { 703 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 704 MatFactorType type; 705 706 F = reuse_mumps->F; 707 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 708 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 709 } else { 710 F = NULL; 711 } 712 713 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 714 if (n_constraints) { 715 Mat M1,M2,M3; 716 Mat auxmat; 717 IS is_aux; 718 PetscScalar *array,*array2; 719 720 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 722 723 /* Extract constraints on R nodes: C_{CR} */ 724 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 727 728 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 729 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 730 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 731 for (i=0;i<n_constraints;i++) { 732 const PetscScalar *row_cmat_values; 733 const PetscInt *row_cmat_indices; 734 PetscInt size_of_constraint,j; 735 736 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 737 for (j=0;j<size_of_constraint;j++) { 738 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 739 } 740 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 741 } 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 743 if (F) { 744 Mat B; 745 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 747 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 748 ierr = MatDestroy(&B);CHKERRQ(ierr); 749 } else { 750 PetscScalar *marr; 751 752 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 753 for (i=0;i<n_constraints;i++) { 754 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 756 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 758 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 759 } 760 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 761 } 762 if (!pcbddc->switch_static) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 764 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 765 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 766 for (i=0;i<n_constraints;i++) { 767 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 768 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 769 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 772 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 773 } 774 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 775 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 776 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 777 } else { 778 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 779 pcbddc->local_auxmat2 = local_auxmat2_R; 780 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 781 } 782 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 783 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 784 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 785 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 786 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 787 if (isCHOL) { 788 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 789 } else { 790 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 791 } 792 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 793 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 794 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 795 ierr = MatDestroy(&M2);CHKERRQ(ierr); 796 ierr = MatDestroy(&M3);CHKERRQ(ierr); 797 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 798 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 799 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 800 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 801 ierr = MatDestroy(&M1);CHKERRQ(ierr); 802 } 803 /* Get submatrices from subdomain matrix */ 804 if (n_vertices) { 805 IS is_aux; 806 807 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 808 IS tis; 809 810 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 811 ierr = ISSort(tis);CHKERRQ(ierr); 812 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 813 ierr = ISDestroy(&tis);CHKERRQ(ierr); 814 } else { 815 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 816 } 817 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 818 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 820 if (n_benign) { 821 IS dummy; 822 PetscInt ii[2]; 823 824 ii[0] = 0; 825 ii[1] = pcbddc->B0_ncol; 826 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 827 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 828 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 829 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 830 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 831 } 832 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 833 } 834 835 /* Matrix of coarse basis functions (local) */ 836 if (pcbddc->coarse_phi_B) { 837 PetscInt on_B,on_primal,on_D=n_D; 838 if (pcbddc->coarse_phi_D) { 839 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 840 } 841 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 842 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 843 PetscScalar *marray; 844 845 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 846 ierr = PetscFree(marray);CHKERRQ(ierr); 847 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 848 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 849 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 850 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 851 } 852 } 853 854 if (!pcbddc->coarse_phi_B) { 855 PetscScalar *marray; 856 857 n = n_B*pcbddc->local_primal_size; 858 if (pcbddc->switch_static || pcbddc->dbg_flag) { 859 n += n_D*pcbddc->local_primal_size; 860 } 861 if (!pcbddc->symmetric_primal) { 862 n *= 2; 863 } 864 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 866 n = n_B*pcbddc->local_primal_size; 867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 869 n += n_D*pcbddc->local_primal_size; 870 } 871 if (!pcbddc->symmetric_primal) { 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 873 if (pcbddc->switch_static || pcbddc->dbg_flag) { 874 n = n_B*pcbddc->local_primal_size; 875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 876 } 877 } else { 878 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 879 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 880 if (pcbddc->switch_static || pcbddc->dbg_flag) { 881 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 882 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 883 } 884 } 885 } 886 887 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 888 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 889 const PetscInt *idxs; 890 891 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 892 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 894 } 895 /* vertices */ 896 if (n_vertices) { 897 898 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 899 900 if (n_R) { 901 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 902 Mat S_V_benign; 903 PetscBLASInt B_N,B_one = 1; 904 PetscScalar *x,*y; 905 PetscBool isseqaij; 906 907 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 908 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 910 if (F) { /* TODO could be optimized for symmetric problems */ 911 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 912 } else { 913 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 914 for (i=0;i<n_vertices;i++) { 915 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 916 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 917 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 920 } 921 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 922 } 923 if (n_benign) { 924 const PetscScalar *vals; 925 const PetscInt *idxs; 926 PetscInt n; 927 928 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 929 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr); 930 ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr); 931 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 932 ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 933 ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr); 934 ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr); 935 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 936 ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr); 937 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 938 } 939 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 940 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 941 if (n_constraints) { 942 Mat B; 943 944 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 945 for (i=0;i<n_vertices;i++) { 946 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 947 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 948 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 } 953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 954 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 955 ierr = MatDestroy(&B);CHKERRQ(ierr); 956 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 957 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 958 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 961 ierr = MatDestroy(&B);CHKERRQ(ierr); 962 } 963 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 964 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 965 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 966 } 967 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 968 if (n_benign) { 969 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 970 } 971 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 973 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 974 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 975 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 976 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 978 if (!n_benign) { 979 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 980 } else { 981 const PetscScalar *vals; 982 PetscScalar val; 983 const PetscInt *idxs; 984 PetscInt n,j; 985 986 /* Use explicit loops to populate the local coarse matrix */ 987 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 988 for (j=0;j<n_vertices;j++) { 989 for (i=0;i<n_vertices;i++) { 990 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 991 } 992 } 993 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 994 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 995 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 996 for (j=0;j<n_vertices;j++) { 997 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 998 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 999 } 1000 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1001 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 1002 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 1003 ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr); 1004 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val; 1005 } 1006 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1007 } else { 1008 /* TODO benign case also here */ 1009 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 1010 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1011 } 1012 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1013 /* coarse basis functions */ 1014 for (i=0;i<n_vertices;i++) { 1015 PetscScalar *y; 1016 1017 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1018 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1019 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1020 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 y[n_B*i+idx_V_B[i]] = 1.0; 1023 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1024 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1025 1026 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1027 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1028 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1029 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1032 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1033 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1034 } 1035 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1036 } 1037 if (n_benign) { 1038 PetscScalar *y; 1039 1040 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1041 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1042 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1045 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1046 1047 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1048 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1049 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr); 1050 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1053 y[n_D*n_vertices+p0_lidx_I] = 0.0; 1054 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1055 } 1056 } 1057 /* if n_R == 0 the object is not destroyed */ 1058 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1059 } 1060 1061 if (n_constraints) { 1062 Mat B; 1063 1064 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1065 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1066 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1067 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1068 if (n_vertices) { 1069 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1070 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1071 } else { 1072 Mat S_VCt; 1073 1074 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1075 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1077 } 1078 } 1079 ierr = MatDestroy(&B);CHKERRQ(ierr); 1080 /* coarse basis functions */ 1081 for (i=0;i<n_constraints;i++) { 1082 PetscScalar *y; 1083 1084 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1085 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1086 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1090 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1091 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1092 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1093 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1094 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1097 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1098 } 1099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1100 } 1101 } 1102 if (n_constraints) { 1103 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1104 } 1105 1106 if (n_benign) { 1107 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1108 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1109 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1110 ierr = VecDestroy(&vec_R_benign_2);CHKERRQ(ierr); 1111 } 1112 1113 /* compute other basis functions for non-symmetric problems */ 1114 if (!pcbddc->symmetric_primal) { 1115 1116 if (n_constraints) { 1117 Mat S_CCT,B_C; 1118 1119 /* this is a lazy thing */ 1120 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1121 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1122 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1123 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1124 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1125 if (n_vertices) { 1126 Mat B_V,S_VCT; 1127 1128 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1129 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1130 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1131 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1132 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1133 } 1134 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1135 } else { /* if there are no constraints, reset work */ 1136 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1137 } 1138 if (n_vertices && n_R) { 1139 Mat A_VRT; 1140 PetscScalar *marray; 1141 PetscBLASInt B_N,B_one = 1; 1142 1143 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1144 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1145 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1146 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1147 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1148 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1149 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1150 } 1151 1152 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1153 for (i=0;i<pcbddc->local_primal_size;i++) { 1154 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1155 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1156 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1157 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1158 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1159 } 1160 } else { 1161 for (i=0;i<pcbddc->local_primal_size;i++) { 1162 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1163 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1164 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1165 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1166 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1167 } 1168 } 1169 /* coarse basis functions */ 1170 for (i=0;i<pcbddc->local_primal_size;i++) { 1171 PetscScalar *y; 1172 1173 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1174 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1175 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1176 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1178 if (i<n_vertices) { 1179 y[n_B*i+idx_V_B[i]] = 1.0; 1180 } 1181 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1182 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1183 1184 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1185 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1186 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1187 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1188 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1189 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1190 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1191 } 1192 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1193 } 1194 } 1195 /* free memory */ 1196 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1197 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1198 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1199 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1200 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1201 ierr = PetscFree(work);CHKERRQ(ierr); 1202 if (n_vertices) { 1203 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1204 } 1205 if (n_constraints) { 1206 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1207 } 1208 /* Checking coarse_sub_mat and coarse basis functios */ 1209 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1210 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1211 if (pcbddc->dbg_flag) { 1212 Mat coarse_sub_mat; 1213 Mat AUXMAT,TM1,TM2,TM3,TM4; 1214 Mat coarse_phi_D,coarse_phi_B; 1215 Mat coarse_psi_D,coarse_psi_B; 1216 Mat A_II,A_BB,A_IB,A_BI; 1217 Mat C_B,CPHI; 1218 IS is_dummy; 1219 Vec mones; 1220 MatType checkmattype=MATSEQAIJ; 1221 PetscReal real_value; 1222 1223 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1224 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1225 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1226 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1227 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1228 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1229 if (unsymmetric_check) { 1230 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1231 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1232 } 1233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1234 1235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1237 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1238 if (unsymmetric_check) { 1239 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1240 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1241 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1242 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1243 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1244 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1245 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1246 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1247 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1248 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1249 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1250 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1251 } else { 1252 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1253 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1254 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1255 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1256 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1257 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1258 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1259 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1260 } 1261 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1262 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1263 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1264 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1265 if (n_benign) { 1266 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1267 PetscScalar *data,*data2; 1268 1269 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1270 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1271 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1272 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1273 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1274 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1275 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1276 for (i=0;i<pcbddc->local_primal_size;i++) { 1277 data[n_vertices*pcbddc->local_primal_size+i] += data2[i]; 1278 data[i*pcbddc->local_primal_size+n_vertices] += data2[i]; 1279 } 1280 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1281 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1282 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1283 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1284 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1285 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1286 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1287 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1288 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1289 } 1290 #if 0 1291 { 1292 PetscViewer viewer; 1293 char filename[256]; 1294 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1295 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1296 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1297 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1298 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1299 } 1300 #endif 1301 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1302 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1305 1306 /* check constraints */ 1307 if (!n_benign) { 1308 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1309 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1310 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1311 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1312 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1313 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1314 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1316 if (unsymmetric_check) { 1317 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1318 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1319 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1320 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1322 } 1323 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1324 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1326 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1327 } 1328 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1329 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1330 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1331 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1332 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1333 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1334 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1335 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1336 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1337 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1338 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1339 if (unsymmetric_check) { 1340 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1341 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1342 } 1343 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1344 } 1345 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1346 /* get back data */ 1347 *coarse_submat_vals_n = coarse_submat_vals; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1353 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1354 { 1355 Mat *work_mat; 1356 IS isrow_s,iscol_s; 1357 PetscBool rsorted,csorted; 1358 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1363 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1364 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1365 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1366 1367 if (!rsorted) { 1368 const PetscInt *idxs; 1369 PetscInt *idxs_sorted,i; 1370 1371 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1372 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1373 for (i=0;i<rsize;i++) { 1374 idxs_perm_r[i] = i; 1375 } 1376 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1377 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1378 for (i=0;i<rsize;i++) { 1379 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1380 } 1381 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1382 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1383 } else { 1384 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1385 isrow_s = isrow; 1386 } 1387 1388 if (!csorted) { 1389 if (isrow == iscol) { 1390 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1391 iscol_s = isrow_s; 1392 } else { 1393 const PetscInt *idxs; 1394 PetscInt *idxs_sorted,i; 1395 1396 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1397 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1398 for (i=0;i<csize;i++) { 1399 idxs_perm_c[i] = i; 1400 } 1401 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1402 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1403 for (i=0;i<csize;i++) { 1404 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1405 } 1406 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1407 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1408 } 1409 } else { 1410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1411 iscol_s = iscol; 1412 } 1413 1414 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1415 1416 if (!rsorted || !csorted) { 1417 Mat new_mat; 1418 IS is_perm_r,is_perm_c; 1419 1420 if (!rsorted) { 1421 PetscInt *idxs_r,i; 1422 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1423 for (i=0;i<rsize;i++) { 1424 idxs_r[idxs_perm_r[i]] = i; 1425 } 1426 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1427 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1428 } else { 1429 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1430 } 1431 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1432 1433 if (!csorted) { 1434 if (isrow_s == iscol_s) { 1435 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1436 is_perm_c = is_perm_r; 1437 } else { 1438 PetscInt *idxs_c,i; 1439 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1440 for (i=0;i<csize;i++) { 1441 idxs_c[idxs_perm_c[i]] = i; 1442 } 1443 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1445 } 1446 } else { 1447 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1448 } 1449 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1450 1451 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1452 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1453 work_mat[0] = new_mat; 1454 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1455 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1456 } 1457 1458 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1459 *B = work_mat[0]; 1460 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1461 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1462 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1468 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1469 { 1470 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1471 PC_IS* pcis = (PC_IS*)pc->data; 1472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1473 Mat new_mat; 1474 IS is_local,is_global; 1475 PetscInt local_size; 1476 PetscBool isseqaij; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1481 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1483 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1484 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1485 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1486 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1487 1488 /* check */ 1489 if (pcbddc->dbg_flag) { 1490 Vec x,x_change; 1491 PetscReal error; 1492 1493 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1494 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1495 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1496 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1499 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1501 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1502 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1505 ierr = VecDestroy(&x);CHKERRQ(ierr); 1506 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1507 } 1508 1509 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1510 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1511 if (isseqaij) { 1512 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1513 } else { 1514 Mat work_mat; 1515 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1516 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1517 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1518 } 1519 if (matis->A->symmetric_set) { 1520 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1521 #if !defined(PETSC_USE_COMPLEX) 1522 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1523 #endif 1524 } 1525 /* 1526 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1527 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1528 */ 1529 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1535 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1536 { 1537 PC_IS* pcis = (PC_IS*)(pc->data); 1538 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1539 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1540 PetscInt *idx_R_local=NULL; 1541 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1542 PetscInt vbs,bs; 1543 PetscBT bitmask=NULL; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 /* 1548 No need to setup local scatters if 1549 - primal space is unchanged 1550 AND 1551 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1552 AND 1553 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1554 */ 1555 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1556 PetscFunctionReturn(0); 1557 } 1558 /* destroy old objects */ 1559 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1560 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1561 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1562 /* Set Non-overlapping dimensions */ 1563 n_B = pcis->n_B; 1564 n_D = pcis->n - n_B; 1565 n_vertices = pcbddc->n_vertices; 1566 1567 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1568 1569 /* create auxiliary bitmask and allocate workspace */ 1570 if (!sub_schurs->reuse_mumps) { 1571 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1572 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1573 for (i=0;i<n_vertices;i++) { 1574 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1575 } 1576 1577 for (i=0, n_R=0; i<pcis->n; i++) { 1578 if (!PetscBTLookup(bitmask,i)) { 1579 idx_R_local[n_R++] = i; 1580 } 1581 } 1582 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1583 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1584 1585 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1586 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1587 } 1588 1589 /* Block code */ 1590 vbs = 1; 1591 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1592 if (bs>1 && !(n_vertices%bs)) { 1593 PetscBool is_blocked = PETSC_TRUE; 1594 PetscInt *vary; 1595 if (!sub_schurs->reuse_mumps) { 1596 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1597 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1598 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1599 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1600 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1601 for (i=0; i<pcis->n/bs; i++) { 1602 if (vary[i]!=0 && vary[i]!=bs) { 1603 is_blocked = PETSC_FALSE; 1604 break; 1605 } 1606 } 1607 ierr = PetscFree(vary);CHKERRQ(ierr); 1608 } else { 1609 /* Verify directly the R set */ 1610 for (i=0; i<n_R/bs; i++) { 1611 PetscInt j,node=idx_R_local[bs*i]; 1612 for (j=1; j<bs; j++) { 1613 if (node != idx_R_local[bs*i+j]-j) { 1614 is_blocked = PETSC_FALSE; 1615 break; 1616 } 1617 } 1618 } 1619 } 1620 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1621 vbs = bs; 1622 for (i=0;i<n_R/vbs;i++) { 1623 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1624 } 1625 } 1626 } 1627 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1628 if (sub_schurs->reuse_mumps) { 1629 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1630 1631 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1632 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1633 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1634 reuse_mumps->is_R = pcbddc->is_R_local; 1635 } else { 1636 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1637 } 1638 1639 /* print some info if requested */ 1640 if (pcbddc->dbg_flag) { 1641 PetscInt benign = 0; 1642 1643 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1644 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1645 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1649 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1651 } 1652 1653 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1654 if (!sub_schurs->reuse_mumps) { 1655 IS is_aux1,is_aux2; 1656 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1657 1658 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1659 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1660 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1661 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1662 for (i=0; i<n_D; i++) { 1663 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1664 } 1665 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1666 for (i=0, j=0; i<n_R; i++) { 1667 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1668 aux_array1[j++] = i; 1669 } 1670 } 1671 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1672 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1673 for (i=0, j=0; i<n_B; i++) { 1674 if (!PetscBTLookup(bitmask,is_indices[i])) { 1675 aux_array2[j++] = i; 1676 } 1677 } 1678 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1679 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1680 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1681 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1682 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1683 1684 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1685 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1686 for (i=0, j=0; i<n_R; i++) { 1687 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1688 aux_array1[j++] = i; 1689 } 1690 } 1691 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1692 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1693 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1694 } 1695 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1696 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1697 } else { 1698 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1699 IS tis; 1700 PetscInt schur_size; 1701 1702 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1703 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1704 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1705 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1707 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1708 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1709 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1710 } 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1718 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1719 { 1720 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1721 PC_IS *pcis = (PC_IS*)pc->data; 1722 PC pc_temp; 1723 Mat A_RR; 1724 MatReuse reuse; 1725 PetscScalar m_one = -1.0; 1726 PetscReal value; 1727 PetscInt n_D,n_R; 1728 PetscBool use_exact,use_exact_reduced,issbaij; 1729 PetscErrorCode ierr; 1730 /* prefixes stuff */ 1731 char dir_prefix[256],neu_prefix[256],str_level[16]; 1732 size_t len; 1733 1734 PetscFunctionBegin; 1735 1736 /* compute prefixes */ 1737 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1738 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1739 if (!pcbddc->current_level) { 1740 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1741 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1742 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1743 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1744 } else { 1745 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1746 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1747 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1748 len -= 15; /* remove "pc_bddc_coarse_" */ 1749 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1750 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1751 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1752 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1753 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1754 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1755 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1756 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1757 } 1758 1759 /* DIRICHLET PROBLEM */ 1760 if (dirichlet) { 1761 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1762 if (pcbddc->local_mat->symmetric_set) { 1763 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1764 } 1765 /* Matrix for Dirichlet problem is pcis->A_II */ 1766 n_D = pcis->n - pcis->n_B; 1767 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1768 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1769 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1770 /* default */ 1771 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1772 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1773 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1774 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1775 if (issbaij) { 1776 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1777 } else { 1778 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1779 } 1780 /* Allow user's customization */ 1781 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1782 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1783 } 1784 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1785 if (sub_schurs->reuse_mumps) { 1786 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1787 1788 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1789 } 1790 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1791 if (!n_D) { 1792 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1793 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1794 } 1795 /* Set Up KSP for Dirichlet problem of BDDC */ 1796 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1797 /* set ksp_D into pcis data */ 1798 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1799 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1800 pcis->ksp_D = pcbddc->ksp_D; 1801 } 1802 1803 /* NEUMANN PROBLEM */ 1804 A_RR = 0; 1805 if (neumann) { 1806 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1807 PetscInt ibs,mbs; 1808 PetscBool issbaij; 1809 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1810 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1811 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1812 if (pcbddc->ksp_R) { /* already created ksp */ 1813 PetscInt nn_R; 1814 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1815 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1816 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1817 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1818 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1819 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1820 reuse = MAT_INITIAL_MATRIX; 1821 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1822 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1823 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1824 reuse = MAT_INITIAL_MATRIX; 1825 } else { /* safe to reuse the matrix */ 1826 reuse = MAT_REUSE_MATRIX; 1827 } 1828 } 1829 /* last check */ 1830 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1831 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1832 reuse = MAT_INITIAL_MATRIX; 1833 } 1834 } else { /* first time, so we need to create the matrix */ 1835 reuse = MAT_INITIAL_MATRIX; 1836 } 1837 /* extract A_RR */ 1838 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1839 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1840 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1841 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1842 if (matis->A == pcbddc->local_mat) { 1843 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1844 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1845 } else { 1846 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1847 } 1848 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1849 if (matis->A == pcbddc->local_mat) { 1850 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1851 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1854 } 1855 } 1856 if (!sub_schurs->reuse_mumps) { 1857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1858 if (pcbddc->local_mat->symmetric_set) { 1859 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1860 } 1861 } else { 1862 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1863 1864 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1865 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1866 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1867 } 1868 if (!pcbddc->ksp_R) { /* create object if not present */ 1869 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1870 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1871 /* default */ 1872 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1873 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1874 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1875 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1876 if (issbaij) { 1877 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1878 } else { 1879 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1880 } 1881 /* Allow user's customization */ 1882 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1883 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1884 } 1885 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1886 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1887 if (!n_R) { 1888 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1889 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1890 } 1891 /* Reuse MUMPS solver if it is present */ 1892 if (sub_schurs->reuse_mumps) { 1893 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1894 1895 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1896 } 1897 /* Set Up KSP for Neumann problem of BDDC */ 1898 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1899 } 1900 /* free Neumann problem's matrix */ 1901 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1902 1903 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1904 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1905 if (pcbddc->dbg_flag) { 1906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1907 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1909 } 1910 if (dirichlet) { /* Dirichlet */ 1911 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1912 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1913 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1914 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1915 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1916 /* need to be adapted? */ 1917 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1918 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1919 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1920 /* print info */ 1921 if (pcbddc->dbg_flag) { 1922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1923 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1924 } 1925 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1926 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1927 } 1928 } 1929 if (neumann) { /* Neumann */ 1930 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1931 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1932 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1934 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1935 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1936 /* need to be adapted? */ 1937 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1938 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1939 /* print info */ 1940 if (pcbddc->dbg_flag) { 1941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1942 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1943 } 1944 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1945 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1946 } 1947 } 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1954 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1955 { 1956 PetscErrorCode ierr; 1957 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1958 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1959 1960 PetscFunctionBegin; 1961 if (!sub_schurs->reuse_mumps) { 1962 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1963 } 1964 if (!pcbddc->switch_static) { 1965 if (applytranspose && pcbddc->local_auxmat1) { 1966 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1967 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1968 } 1969 if (!sub_schurs->reuse_mumps) { 1970 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1971 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1972 } else { 1973 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1974 1975 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1977 } 1978 } else { 1979 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1980 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1981 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1982 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1983 if (applytranspose && pcbddc->local_auxmat1) { 1984 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1985 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1986 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1987 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1988 } 1989 } 1990 if (!sub_schurs->reuse_mumps) { 1991 if (applytranspose) { 1992 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1993 } else { 1994 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1995 } 1996 #if defined(PETSC_HAVE_MUMPS) 1997 } else { 1998 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1999 2000 if (applytranspose) { 2001 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2002 } else { 2003 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2004 } 2005 #endif 2006 } 2007 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2008 if (!pcbddc->switch_static) { 2009 if (!sub_schurs->reuse_mumps) { 2010 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2011 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2012 } else { 2013 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2014 2015 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2016 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2017 } 2018 if (!applytranspose && pcbddc->local_auxmat1) { 2019 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2020 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2021 } 2022 } else { 2023 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2027 if (!applytranspose && pcbddc->local_auxmat1) { 2028 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2029 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2030 } 2031 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 2039 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2042 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2043 { 2044 PetscErrorCode ierr; 2045 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2046 PC_IS* pcis = (PC_IS*) (pc->data); 2047 const PetscScalar zero = 0.0; 2048 2049 PetscFunctionBegin; 2050 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2051 if (applytranspose) { 2052 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2053 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2054 } else { 2055 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2056 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2057 } 2058 2059 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2060 if (pcbddc->benign_p0_lidx >= 0) { 2061 PetscScalar *array; 2062 2063 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2064 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2065 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2066 } 2067 2068 /* start communications from local primal nodes to rhs of coarse solver */ 2069 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2070 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 2073 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2074 /* TODO remove null space when doing multilevel */ 2075 if (pcbddc->coarse_ksp) { 2076 Vec rhs,sol; 2077 2078 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2079 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2080 if (applytranspose) { 2081 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2082 } else { 2083 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2084 } 2085 } 2086 2087 /* Local solution on R nodes */ 2088 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2089 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2090 } 2091 2092 /* communications from coarse sol to local primal nodes */ 2093 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2094 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2095 2096 /* Sum contributions from two levels */ 2097 if (applytranspose) { 2098 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2099 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2100 } else { 2101 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2102 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2103 } 2104 /* store p0 */ 2105 if (pcbddc->benign_p0_lidx >= 0) { 2106 PetscScalar *array; 2107 2108 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2109 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2110 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2117 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2118 { 2119 PetscErrorCode ierr; 2120 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2121 PetscScalar *array; 2122 Vec from,to; 2123 2124 PetscFunctionBegin; 2125 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2126 from = pcbddc->coarse_vec; 2127 to = pcbddc->vec1_P; 2128 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2129 Vec tvec; 2130 2131 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2132 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2133 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2134 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2135 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2136 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2137 } 2138 } else { /* from local to global -> put data in coarse right hand side */ 2139 from = pcbddc->vec1_P; 2140 to = pcbddc->coarse_vec; 2141 } 2142 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2148 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2149 { 2150 PetscErrorCode ierr; 2151 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2152 PetscScalar *array; 2153 Vec from,to; 2154 2155 PetscFunctionBegin; 2156 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2157 from = pcbddc->coarse_vec; 2158 to = pcbddc->vec1_P; 2159 } else { /* from local to global -> put data in coarse right hand side */ 2160 from = pcbddc->vec1_P; 2161 to = pcbddc->coarse_vec; 2162 } 2163 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2164 if (smode == SCATTER_FORWARD) { 2165 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2166 Vec tvec; 2167 2168 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2169 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2170 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2171 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2172 } 2173 } else { 2174 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2175 ierr = VecResetArray(from);CHKERRQ(ierr); 2176 } 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* uncomment for testing purposes */ 2182 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2185 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2186 { 2187 PetscErrorCode ierr; 2188 PC_IS* pcis = (PC_IS*)(pc->data); 2189 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2190 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2191 /* one and zero */ 2192 PetscScalar one=1.0,zero=0.0; 2193 /* space to store constraints and their local indices */ 2194 PetscScalar *constraints_data; 2195 PetscInt *constraints_idxs,*constraints_idxs_B; 2196 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2197 PetscInt *constraints_n; 2198 /* iterators */ 2199 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2200 /* BLAS integers */ 2201 PetscBLASInt lwork,lierr; 2202 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2203 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2204 /* reuse */ 2205 PetscInt olocal_primal_size,olocal_primal_size_cc; 2206 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2207 /* change of basis */ 2208 PetscBool qr_needed; 2209 PetscBT change_basis,qr_needed_idx; 2210 /* auxiliary stuff */ 2211 PetscInt *nnz,*is_indices; 2212 PetscInt ncc; 2213 /* some quantities */ 2214 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2215 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2216 2217 PetscFunctionBegin; 2218 /* Destroy Mat objects computed previously */ 2219 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2220 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2221 /* save info on constraints from previous setup (if any) */ 2222 olocal_primal_size = pcbddc->local_primal_size; 2223 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2224 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2225 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2226 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2227 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2228 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2229 2230 /* print some info */ 2231 if (pcbddc->dbg_flag) { 2232 IS vertices; 2233 PetscInt nv,nedges,nfaces; 2234 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2235 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2236 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2237 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2242 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2243 } 2244 2245 if (!pcbddc->adaptive_selection) { 2246 IS ISForVertices,*ISForFaces,*ISForEdges; 2247 MatNullSpace nearnullsp; 2248 const Vec *nearnullvecs; 2249 Vec *localnearnullsp; 2250 PetscScalar *array; 2251 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2252 PetscBool nnsp_has_cnst; 2253 /* LAPACK working arrays for SVD or POD */ 2254 PetscBool skip_lapack,boolforchange; 2255 PetscScalar *work; 2256 PetscReal *singular_vals; 2257 #if defined(PETSC_USE_COMPLEX) 2258 PetscReal *rwork; 2259 #endif 2260 #if defined(PETSC_MISSING_LAPACK_GESVD) 2261 PetscScalar *temp_basis,*correlation_mat; 2262 #else 2263 PetscBLASInt dummy_int=1; 2264 PetscScalar dummy_scalar=1.; 2265 #endif 2266 2267 /* Get index sets for faces, edges and vertices from graph */ 2268 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2269 /* free unneeded index sets */ 2270 if (!pcbddc->use_vertices) { 2271 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2272 } 2273 if (!pcbddc->use_edges) { 2274 for (i=0;i<n_ISForEdges;i++) { 2275 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2276 } 2277 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2278 n_ISForEdges = 0; 2279 } 2280 if (!pcbddc->use_faces) { 2281 for (i=0;i<n_ISForFaces;i++) { 2282 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2283 } 2284 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2285 n_ISForFaces = 0; 2286 } 2287 2288 #if defined(PETSC_USE_DEBUG) 2289 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2290 Also use_change_of_basis should be consistent among processors */ 2291 if (pcbddc->NullSpace) { 2292 PetscBool tbool[2],gbool[2]; 2293 2294 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2295 pcbddc->use_change_of_basis = PETSC_TRUE; 2296 if (!ISForEdges) { 2297 pcbddc->use_change_on_faces = PETSC_TRUE; 2298 } 2299 } 2300 tbool[0] = pcbddc->use_change_of_basis; 2301 tbool[1] = pcbddc->use_change_on_faces; 2302 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2303 pcbddc->use_change_of_basis = gbool[0]; 2304 pcbddc->use_change_on_faces = gbool[1]; 2305 } 2306 #endif 2307 2308 /* check if near null space is attached to global mat */ 2309 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2310 if (nearnullsp) { 2311 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2312 /* remove any stored info */ 2313 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2314 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2315 /* store information for BDDC solver reuse */ 2316 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2317 pcbddc->onearnullspace = nearnullsp; 2318 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2319 for (i=0;i<nnsp_size;i++) { 2320 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2321 } 2322 } else { /* if near null space is not provided BDDC uses constants by default */ 2323 nnsp_size = 0; 2324 nnsp_has_cnst = PETSC_TRUE; 2325 } 2326 /* get max number of constraints on a single cc */ 2327 max_constraints = nnsp_size; 2328 if (nnsp_has_cnst) max_constraints++; 2329 2330 /* 2331 Evaluate maximum storage size needed by the procedure 2332 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2333 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2334 There can be multiple constraints per connected component 2335 */ 2336 n_vertices = 0; 2337 if (ISForVertices) { 2338 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2339 } 2340 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2341 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2342 2343 total_counts = n_ISForFaces+n_ISForEdges; 2344 total_counts *= max_constraints; 2345 total_counts += n_vertices; 2346 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2347 2348 total_counts = 0; 2349 max_size_of_constraint = 0; 2350 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2351 IS used_is; 2352 if (i<n_ISForEdges) { 2353 used_is = ISForEdges[i]; 2354 } else { 2355 used_is = ISForFaces[i-n_ISForEdges]; 2356 } 2357 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2358 total_counts += j; 2359 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2360 } 2361 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2362 2363 /* get local part of global near null space vectors */ 2364 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2365 for (k=0;k<nnsp_size;k++) { 2366 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2367 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2368 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2369 } 2370 2371 /* whether or not to skip lapack calls */ 2372 skip_lapack = PETSC_TRUE; 2373 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2374 2375 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2376 if (!skip_lapack) { 2377 PetscScalar temp_work; 2378 2379 #if defined(PETSC_MISSING_LAPACK_GESVD) 2380 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2381 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2382 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2383 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2384 #if defined(PETSC_USE_COMPLEX) 2385 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2386 #endif 2387 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2388 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2389 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2390 lwork = -1; 2391 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2392 #if !defined(PETSC_USE_COMPLEX) 2393 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2394 #else 2395 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2396 #endif 2397 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2398 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2399 #else /* on missing GESVD */ 2400 /* SVD */ 2401 PetscInt max_n,min_n; 2402 max_n = max_size_of_constraint; 2403 min_n = max_constraints; 2404 if (max_size_of_constraint < max_constraints) { 2405 min_n = max_size_of_constraint; 2406 max_n = max_constraints; 2407 } 2408 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2409 #if defined(PETSC_USE_COMPLEX) 2410 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2411 #endif 2412 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2413 lwork = -1; 2414 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2415 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2416 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2418 #if !defined(PETSC_USE_COMPLEX) 2419 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2420 #else 2421 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2422 #endif 2423 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2424 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2425 #endif /* on missing GESVD */ 2426 /* Allocate optimal workspace */ 2427 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2428 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2429 } 2430 /* Now we can loop on constraining sets */ 2431 total_counts = 0; 2432 constraints_idxs_ptr[0] = 0; 2433 constraints_data_ptr[0] = 0; 2434 /* vertices */ 2435 if (n_vertices) { 2436 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2437 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2438 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2439 for (i=0;i<n_vertices;i++) { 2440 constraints_n[total_counts] = 1; 2441 constraints_data[total_counts] = 1.0; 2442 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2443 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2444 total_counts++; 2445 } 2446 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2447 PetscBool used_vertex; 2448 for (i=0;i<n_vertices;i++) { 2449 used_vertex = PETSC_FALSE; 2450 k = 0; 2451 while (!used_vertex && k<nnsp_size) { 2452 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2453 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2454 constraints_n[total_counts] = 1; 2455 constraints_idxs[total_counts] = is_indices[i]; 2456 constraints_data[total_counts] = 1.0; 2457 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2458 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2459 total_counts++; 2460 used_vertex = PETSC_TRUE; 2461 } 2462 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2463 k++; 2464 } 2465 } 2466 } 2467 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2468 n_vertices = total_counts; 2469 } 2470 2471 /* edges and faces */ 2472 total_counts_cc = total_counts; 2473 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2474 IS used_is; 2475 PetscBool idxs_copied = PETSC_FALSE; 2476 2477 if (ncc<n_ISForEdges) { 2478 used_is = ISForEdges[ncc]; 2479 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2480 } else { 2481 used_is = ISForFaces[ncc-n_ISForEdges]; 2482 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2483 } 2484 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2485 2486 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2487 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2488 /* change of basis should not be performed on local periodic nodes */ 2489 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2490 if (nnsp_has_cnst) { 2491 PetscScalar quad_value; 2492 2493 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2494 idxs_copied = PETSC_TRUE; 2495 2496 if (!pcbddc->use_nnsp_true) { 2497 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2498 } else { 2499 quad_value = 1.0; 2500 } 2501 for (j=0;j<size_of_constraint;j++) { 2502 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2503 } 2504 temp_constraints++; 2505 total_counts++; 2506 } 2507 for (k=0;k<nnsp_size;k++) { 2508 PetscReal real_value; 2509 PetscScalar *ptr_to_data; 2510 2511 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2512 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2513 for (j=0;j<size_of_constraint;j++) { 2514 ptr_to_data[j] = array[is_indices[j]]; 2515 } 2516 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2517 /* check if array is null on the connected component */ 2518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2519 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2520 if (real_value > 0.0) { /* keep indices and values */ 2521 temp_constraints++; 2522 total_counts++; 2523 if (!idxs_copied) { 2524 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2525 idxs_copied = PETSC_TRUE; 2526 } 2527 } 2528 } 2529 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2530 valid_constraints = temp_constraints; 2531 if (!pcbddc->use_nnsp_true && temp_constraints) { 2532 if (temp_constraints == 1) { /* just normalize the constraint */ 2533 PetscScalar norm,*ptr_to_data; 2534 2535 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2537 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2538 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2539 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2540 } else { /* perform SVD */ 2541 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2542 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2543 2544 #if defined(PETSC_MISSING_LAPACK_GESVD) 2545 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2546 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2547 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2548 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2549 from that computed using LAPACKgesvd 2550 -> This is due to a different computation of eigenvectors in LAPACKheev 2551 -> The quality of the POD-computed basis will be the same */ 2552 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2553 /* Store upper triangular part of correlation matrix */ 2554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2556 for (j=0;j<temp_constraints;j++) { 2557 for (k=0;k<j+1;k++) { 2558 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2559 } 2560 } 2561 /* compute eigenvalues and eigenvectors of correlation matrix */ 2562 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2564 #if !defined(PETSC_USE_COMPLEX) 2565 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2566 #else 2567 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2568 #endif 2569 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2570 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2571 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2572 j = 0; 2573 while (j < temp_constraints && singular_vals[j] < tol) j++; 2574 total_counts = total_counts-j; 2575 valid_constraints = temp_constraints-j; 2576 /* scale and copy POD basis into used quadrature memory */ 2577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2582 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2583 if (j<temp_constraints) { 2584 PetscInt ii; 2585 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2587 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2589 for (k=0;k<temp_constraints-j;k++) { 2590 for (ii=0;ii<size_of_constraint;ii++) { 2591 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2592 } 2593 } 2594 } 2595 #else /* on missing GESVD */ 2596 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2597 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2599 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2600 #if !defined(PETSC_USE_COMPLEX) 2601 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2602 #else 2603 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2604 #endif 2605 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2607 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2608 k = temp_constraints; 2609 if (k > size_of_constraint) k = size_of_constraint; 2610 j = 0; 2611 while (j < k && singular_vals[k-j-1] < tol) j++; 2612 valid_constraints = k-j; 2613 total_counts = total_counts-temp_constraints+valid_constraints; 2614 #endif /* on missing GESVD */ 2615 } 2616 } 2617 /* update pointers information */ 2618 if (valid_constraints) { 2619 constraints_n[total_counts_cc] = valid_constraints; 2620 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2621 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2622 /* set change_of_basis flag */ 2623 if (boolforchange) { 2624 PetscBTSet(change_basis,total_counts_cc); 2625 } 2626 total_counts_cc++; 2627 } 2628 } 2629 /* free workspace */ 2630 if (!skip_lapack) { 2631 ierr = PetscFree(work);CHKERRQ(ierr); 2632 #if defined(PETSC_USE_COMPLEX) 2633 ierr = PetscFree(rwork);CHKERRQ(ierr); 2634 #endif 2635 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2636 #if defined(PETSC_MISSING_LAPACK_GESVD) 2637 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2638 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2639 #endif 2640 } 2641 for (k=0;k<nnsp_size;k++) { 2642 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2643 } 2644 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2645 /* free index sets of faces, edges and vertices */ 2646 for (i=0;i<n_ISForFaces;i++) { 2647 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2648 } 2649 if (n_ISForFaces) { 2650 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2651 } 2652 for (i=0;i<n_ISForEdges;i++) { 2653 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2654 } 2655 if (n_ISForEdges) { 2656 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2657 } 2658 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2659 } else { 2660 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2661 2662 total_counts = 0; 2663 n_vertices = 0; 2664 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2665 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2666 } 2667 max_constraints = 0; 2668 total_counts_cc = 0; 2669 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2670 total_counts += pcbddc->adaptive_constraints_n[i]; 2671 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2672 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2673 } 2674 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2675 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2676 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2677 constraints_data = pcbddc->adaptive_constraints_data; 2678 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2679 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2680 total_counts_cc = 0; 2681 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2682 if (pcbddc->adaptive_constraints_n[i]) { 2683 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2684 } 2685 } 2686 #if 0 2687 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2688 for (i=0;i<total_counts_cc;i++) { 2689 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2690 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2691 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2692 printf(" %d",constraints_idxs[j]); 2693 } 2694 printf("\n"); 2695 printf("number of cc: %d\n",constraints_n[i]); 2696 } 2697 for (i=0;i<n_vertices;i++) { 2698 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2699 } 2700 for (i=0;i<sub_schurs->n_subs;i++) { 2701 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2702 } 2703 #endif 2704 2705 max_size_of_constraint = 0; 2706 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2707 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2708 /* Change of basis */ 2709 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2710 if (pcbddc->use_change_of_basis) { 2711 for (i=0;i<sub_schurs->n_subs;i++) { 2712 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2713 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2714 } 2715 } 2716 } 2717 } 2718 pcbddc->local_primal_size = total_counts; 2719 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2720 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2721 2722 /* map constraints_idxs in boundary numbering */ 2723 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2724 if (i != constraints_idxs_ptr[total_counts_cc]) { 2725 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2726 } 2727 2728 /* Create constraint matrix */ 2729 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2730 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2731 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2732 2733 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2734 /* determine if a QR strategy is needed for change of basis */ 2735 qr_needed = PETSC_FALSE; 2736 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2737 total_primal_vertices=0; 2738 pcbddc->local_primal_size_cc = 0; 2739 for (i=0;i<total_counts_cc;i++) { 2740 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2741 if (size_of_constraint == 1) { 2742 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2743 pcbddc->local_primal_size_cc += 1; 2744 } else if (PetscBTLookup(change_basis,i)) { 2745 for (k=0;k<constraints_n[i];k++) { 2746 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2747 } 2748 pcbddc->local_primal_size_cc += constraints_n[i]; 2749 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2750 PetscBTSet(qr_needed_idx,i); 2751 qr_needed = PETSC_TRUE; 2752 } 2753 } else { 2754 pcbddc->local_primal_size_cc += 1; 2755 } 2756 } 2757 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2758 pcbddc->n_vertices = total_primal_vertices; 2759 /* permute indices in order to have a sorted set of vertices */ 2760 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2761 2762 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2763 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2764 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2765 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2766 2767 /* nonzero structure of constraint matrix */ 2768 /* and get reference dof for local constraints */ 2769 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2770 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2771 2772 j = total_primal_vertices; 2773 total_counts = total_primal_vertices; 2774 cum = total_primal_vertices; 2775 for (i=n_vertices;i<total_counts_cc;i++) { 2776 if (!PetscBTLookup(change_basis,i)) { 2777 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2778 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2779 cum++; 2780 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2781 for (k=0;k<constraints_n[i];k++) { 2782 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2783 nnz[j+k] = size_of_constraint; 2784 } 2785 j += constraints_n[i]; 2786 } 2787 } 2788 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2789 ierr = PetscFree(nnz);CHKERRQ(ierr); 2790 2791 /* set values in constraint matrix */ 2792 for (i=0;i<total_primal_vertices;i++) { 2793 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2794 } 2795 total_counts = total_primal_vertices; 2796 for (i=n_vertices;i<total_counts_cc;i++) { 2797 if (!PetscBTLookup(change_basis,i)) { 2798 PetscInt *cols; 2799 2800 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2801 cols = constraints_idxs+constraints_idxs_ptr[i]; 2802 for (k=0;k<constraints_n[i];k++) { 2803 PetscInt row = total_counts+k; 2804 PetscScalar *vals; 2805 2806 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2807 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2808 } 2809 total_counts += constraints_n[i]; 2810 } 2811 } 2812 /* assembling */ 2813 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 2816 /* 2817 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2818 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2819 */ 2820 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2821 if (pcbddc->use_change_of_basis) { 2822 /* dual and primal dofs on a single cc */ 2823 PetscInt dual_dofs,primal_dofs; 2824 /* working stuff for GEQRF */ 2825 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2826 PetscBLASInt lqr_work; 2827 /* working stuff for UNGQR */ 2828 PetscScalar *gqr_work,lgqr_work_t; 2829 PetscBLASInt lgqr_work; 2830 /* working stuff for TRTRS */ 2831 PetscScalar *trs_rhs; 2832 PetscBLASInt Blas_NRHS; 2833 /* pointers for values insertion into change of basis matrix */ 2834 PetscInt *start_rows,*start_cols; 2835 PetscScalar *start_vals; 2836 /* working stuff for values insertion */ 2837 PetscBT is_primal; 2838 PetscInt *aux_primal_numbering_B; 2839 /* matrix sizes */ 2840 PetscInt global_size,local_size; 2841 /* temporary change of basis */ 2842 Mat localChangeOfBasisMatrix; 2843 /* extra space for debugging */ 2844 PetscScalar *dbg_work; 2845 2846 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2847 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2848 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2849 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2850 /* nonzeros for local mat */ 2851 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2852 for (i=0;i<pcis->n;i++) nnz[i]=1; 2853 for (i=n_vertices;i<total_counts_cc;i++) { 2854 if (PetscBTLookup(change_basis,i)) { 2855 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2856 if (PetscBTLookup(qr_needed_idx,i)) { 2857 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2858 } else { 2859 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2860 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2861 } 2862 } 2863 } 2864 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2865 ierr = PetscFree(nnz);CHKERRQ(ierr); 2866 /* Set initial identity in the matrix */ 2867 for (i=0;i<pcis->n;i++) { 2868 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2869 } 2870 2871 if (pcbddc->dbg_flag) { 2872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2874 } 2875 2876 2877 /* Now we loop on the constraints which need a change of basis */ 2878 /* 2879 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2880 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2881 2882 Basic blocks of change of basis matrix T computed by 2883 2884 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2885 2886 | 1 0 ... 0 s_1/S | 2887 | 0 1 ... 0 s_2/S | 2888 | ... | 2889 | 0 ... 1 s_{n-1}/S | 2890 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2891 2892 with S = \sum_{i=1}^n s_i^2 2893 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2894 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2895 2896 - QR decomposition of constraints otherwise 2897 */ 2898 if (qr_needed) { 2899 /* space to store Q */ 2900 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2901 /* first we issue queries for optimal work */ 2902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2904 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2905 lqr_work = -1; 2906 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2907 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2908 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2909 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2910 lgqr_work = -1; 2911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2913 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2914 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2915 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2916 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2917 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2918 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2919 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2920 /* array to store scaling factors for reflectors */ 2921 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2922 /* array to store rhs and solution of triangular solver */ 2923 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2924 /* allocating workspace for check */ 2925 if (pcbddc->dbg_flag) { 2926 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2927 } 2928 } 2929 /* array to store whether a node is primal or not */ 2930 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2931 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2932 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2933 if (i != total_primal_vertices) { 2934 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2935 } 2936 for (i=0;i<total_primal_vertices;i++) { 2937 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2938 } 2939 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2940 2941 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2942 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2943 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2944 if (PetscBTLookup(change_basis,total_counts)) { 2945 /* get constraint info */ 2946 primal_dofs = constraints_n[total_counts]; 2947 dual_dofs = size_of_constraint-primal_dofs; 2948 2949 if (pcbddc->dbg_flag) { 2950 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2951 } 2952 2953 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2954 2955 /* copy quadrature constraints for change of basis check */ 2956 if (pcbddc->dbg_flag) { 2957 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2958 } 2959 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2960 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2961 2962 /* compute QR decomposition of constraints */ 2963 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2964 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2966 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2967 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2968 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2969 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2970 2971 /* explictly compute R^-T */ 2972 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2973 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2974 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2975 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2977 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2978 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2979 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2980 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2981 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2982 2983 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2984 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2986 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2988 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2989 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2990 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2992 2993 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2994 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2995 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2996 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2997 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2998 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2999 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3000 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3002 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3003 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3004 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3005 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3006 3007 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3008 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3009 /* insert cols for primal dofs */ 3010 for (j=0;j<primal_dofs;j++) { 3011 start_vals = &qr_basis[j*size_of_constraint]; 3012 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3013 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3014 } 3015 /* insert cols for dual dofs */ 3016 for (j=0,k=0;j<dual_dofs;k++) { 3017 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3018 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3019 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3020 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3021 j++; 3022 } 3023 } 3024 3025 /* check change of basis */ 3026 if (pcbddc->dbg_flag) { 3027 PetscInt ii,jj; 3028 PetscBool valid_qr=PETSC_TRUE; 3029 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3034 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3035 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3036 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3037 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3038 for (jj=0;jj<size_of_constraint;jj++) { 3039 for (ii=0;ii<primal_dofs;ii++) { 3040 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3041 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3042 } 3043 } 3044 if (!valid_qr) { 3045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3046 for (jj=0;jj<size_of_constraint;jj++) { 3047 for (ii=0;ii<primal_dofs;ii++) { 3048 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3049 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3050 } 3051 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3052 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3053 } 3054 } 3055 } 3056 } else { 3057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3058 } 3059 } 3060 } else { /* simple transformation block */ 3061 PetscInt row,col; 3062 PetscScalar val,norm; 3063 3064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3065 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3066 for (j=0;j<size_of_constraint;j++) { 3067 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3068 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3069 if (!PetscBTLookup(is_primal,row_B)) { 3070 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3071 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3072 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3073 } else { 3074 for (k=0;k<size_of_constraint;k++) { 3075 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3076 if (row != col) { 3077 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3078 } else { 3079 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3080 } 3081 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3082 } 3083 } 3084 } 3085 if (pcbddc->dbg_flag) { 3086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3087 } 3088 } 3089 } else { 3090 if (pcbddc->dbg_flag) { 3091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3092 } 3093 } 3094 } 3095 3096 /* free workspace */ 3097 if (qr_needed) { 3098 if (pcbddc->dbg_flag) { 3099 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3100 } 3101 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3102 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3103 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3104 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3105 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3106 } 3107 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3108 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 3111 /* assembling of global change of variable */ 3112 { 3113 Mat tmat; 3114 PetscInt bs; 3115 3116 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3117 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3118 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3119 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3120 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3121 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3122 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3123 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3124 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3125 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3126 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3127 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3128 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3129 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3130 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3131 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3132 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3133 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3134 } 3135 /* check */ 3136 if (pcbddc->dbg_flag) { 3137 PetscReal error; 3138 Vec x,x_change; 3139 3140 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3141 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3142 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3143 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3144 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3146 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3147 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3148 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3149 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3150 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3151 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3154 ierr = VecDestroy(&x);CHKERRQ(ierr); 3155 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3156 } 3157 3158 /* adapt sub_schurs computed (if any) */ 3159 if (pcbddc->use_deluxe_scaling) { 3160 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3161 if (sub_schurs->S_Ej_all) { 3162 Mat S_new,tmat; 3163 IS is_all_N; 3164 3165 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3166 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3167 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3168 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3169 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3170 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3171 sub_schurs->S_Ej_all = S_new; 3172 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3173 if (sub_schurs->sum_S_Ej_all) { 3174 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3175 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3176 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3177 sub_schurs->sum_S_Ej_all = S_new; 3178 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3179 } 3180 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3181 } 3182 } 3183 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3184 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3185 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3186 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3187 } 3188 3189 /* set up change of basis context */ 3190 if (pcbddc->ChangeOfBasisMatrix) { 3191 PCBDDCChange_ctx change_ctx; 3192 3193 if (!pcbddc->new_global_mat) { 3194 PetscInt global_size,local_size; 3195 3196 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3197 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3198 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3199 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3200 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3201 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3202 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3203 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3204 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3205 } else { 3206 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3207 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3208 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3209 } 3210 if (!pcbddc->user_ChangeOfBasisMatrix) { 3211 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3212 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3213 } else { 3214 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3215 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3216 } 3217 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3218 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3219 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3220 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3221 change_ctx->pc = pc; 3222 } 3223 3224 /* add pressure dof to set of primal nodes for numbering purposes */ 3225 if (pcbddc->benign_p0_lidx >= 0) { 3226 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3227 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3228 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3229 pcbddc->local_primal_size_cc++; 3230 pcbddc->local_primal_size++; 3231 } 3232 3233 /* check if a new primal space has been introduced (also take into account benign trick) */ 3234 pcbddc->new_primal_space_local = PETSC_TRUE; 3235 if (olocal_primal_size == pcbddc->local_primal_size) { 3236 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3237 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3238 if (!pcbddc->new_primal_space_local) { 3239 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3240 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3241 } 3242 } 3243 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3244 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3245 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3246 3247 /* flush dbg viewer */ 3248 if (pcbddc->dbg_flag) { 3249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3250 } 3251 3252 /* free workspace */ 3253 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3254 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3255 if (!pcbddc->adaptive_selection) { 3256 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3257 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3258 } else { 3259 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3260 pcbddc->adaptive_constraints_idxs_ptr, 3261 pcbddc->adaptive_constraints_data_ptr, 3262 pcbddc->adaptive_constraints_idxs, 3263 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3264 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3265 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3266 } 3267 PetscFunctionReturn(0); 3268 } 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3272 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3273 { 3274 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3275 PC_IS *pcis = (PC_IS*)pc->data; 3276 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3277 PetscInt ierr,i,vertex_size,N; 3278 PetscViewer viewer=pcbddc->dbg_viewer; 3279 3280 PetscFunctionBegin; 3281 /* Reset previously computed graph */ 3282 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3283 /* Init local Graph struct */ 3284 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3285 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3286 3287 /* Check validity of the csr graph passed in by the user */ 3288 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3289 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3290 } 3291 3292 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3293 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3294 PetscInt *xadj,*adjncy; 3295 PetscInt nvtxs; 3296 PetscBool flg_row=PETSC_FALSE; 3297 3298 if (pcbddc->use_local_adj) { 3299 3300 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3301 if (flg_row) { 3302 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3303 pcbddc->computed_rowadj = PETSC_TRUE; 3304 } 3305 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3306 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3307 IS is_dummy; 3308 ISLocalToGlobalMapping l2gmap_dummy; 3309 PetscInt j,sum; 3310 PetscInt *cxadj,*cadjncy; 3311 const PetscInt *idxs; 3312 PCBDDCGraph graph; 3313 PetscBT is_on_boundary; 3314 3315 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3316 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3317 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3318 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3319 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3320 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3321 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3322 if (flg_row) { 3323 graph->xadj = xadj; 3324 graph->adjncy = adjncy; 3325 } 3326 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3327 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3328 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3329 3330 if (pcbddc->dbg_flag) { 3331 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3332 for (i=0;i<graph->ncc;i++) { 3333 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3334 } 3335 } 3336 3337 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3338 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3339 for (i=0;i<pcis->n_B;i++) { 3340 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3341 } 3342 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3343 3344 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3345 sum = 0; 3346 for (i=0;i<graph->ncc;i++) { 3347 PetscInt sizecc = 0; 3348 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3349 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3350 sizecc++; 3351 } 3352 } 3353 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3354 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3355 cxadj[graph->queue[j]] = sizecc; 3356 } 3357 } 3358 sum += sizecc*sizecc; 3359 } 3360 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3361 sum = 0; 3362 for (i=0;i<pcis->n;i++) { 3363 PetscInt temp = cxadj[i]; 3364 cxadj[i] = sum; 3365 sum += temp; 3366 } 3367 cxadj[pcis->n] = sum; 3368 for (i=0;i<graph->ncc;i++) { 3369 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3370 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3371 PetscInt k,sizecc = 0; 3372 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3373 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3374 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3375 sizecc++; 3376 } 3377 } 3378 } 3379 } 3380 } 3381 if (sum) { 3382 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3383 } else { 3384 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3385 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3386 } 3387 graph->xadj = 0; 3388 graph->adjncy = 0; 3389 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3390 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3391 } 3392 } 3393 if (pcbddc->dbg_flag) { 3394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3395 } 3396 3397 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3398 vertex_size = 1; 3399 if (pcbddc->user_provided_isfordofs) { 3400 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3401 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3402 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3403 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3404 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3405 } 3406 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3407 pcbddc->n_ISForDofs = 0; 3408 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3409 } 3410 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3411 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3412 } else { 3413 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3414 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3415 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3416 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3417 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3418 } 3419 } 3420 } 3421 3422 /* Setup of Graph */ 3423 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3424 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3425 } 3426 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3427 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3428 } 3429 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3430 3431 /* Graph's connected components analysis */ 3432 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3433 3434 /* print some info to stdout */ 3435 if (pcbddc->dbg_flag) { 3436 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3437 } 3438 3439 /* mark topography has done */ 3440 pcbddc->recompute_topography = PETSC_FALSE; 3441 PetscFunctionReturn(0); 3442 } 3443 3444 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3445 #undef __FUNCT__ 3446 #define __FUNCT__ "PCBDDCSubsetNumbering" 3447 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3448 { 3449 PetscSF sf; 3450 PetscLayout map; 3451 const PetscInt *idxs; 3452 PetscInt *leaf_data,*root_data,*gidxs; 3453 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3454 PetscInt n_n,nlocals,start,first_index; 3455 PetscMPIInt commsize; 3456 PetscBool first_found; 3457 PetscErrorCode ierr; 3458 3459 PetscFunctionBegin; 3460 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3461 if (subset_mult) { 3462 PetscCheckSameComm(subset,1,subset_mult,2); 3463 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3464 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3465 } 3466 /* create workspace layout for computing global indices of subset */ 3467 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3468 lbounds[0] = lbounds[1] = 0; 3469 for (i=0;i<n;i++) { 3470 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3471 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3472 } 3473 lbounds[0] = -lbounds[0]; 3474 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3475 gbounds[0] = -gbounds[0]; 3476 N = gbounds[1] - gbounds[0] + 1; 3477 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3478 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3479 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3480 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3481 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3482 3483 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3484 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3485 if (subset_mult) { 3486 const PetscInt* idxs_mult; 3487 3488 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3489 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3490 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3491 } else { 3492 for (i=0;i<n;i++) leaf_data[i] = 1; 3493 } 3494 /* local size of new subset */ 3495 n_n = 0; 3496 for (i=0;i<n;i++) n_n += leaf_data[i]; 3497 3498 /* global indexes in layout */ 3499 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3500 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3501 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3502 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3503 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3504 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3505 3506 /* reduce from leaves to roots */ 3507 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3508 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3509 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3510 3511 /* count indexes in local part of layout */ 3512 nlocals = 0; 3513 first_index = -1; 3514 first_found = PETSC_FALSE; 3515 for (i=0;i<Nl;i++) { 3516 if (!first_found && root_data[i]) { 3517 first_found = PETSC_TRUE; 3518 first_index = i; 3519 } 3520 nlocals += root_data[i]; 3521 } 3522 3523 /* cumulative of number of indexes and size of subset without holes */ 3524 #if defined(PETSC_HAVE_MPI_EXSCAN) 3525 start = 0; 3526 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3527 #else 3528 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3529 start = start-nlocals; 3530 #endif 3531 3532 if (N_n) { /* compute total size of new subset if requested */ 3533 *N_n = start + nlocals; 3534 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3535 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3536 } 3537 3538 /* adapt root data with cumulative */ 3539 if (first_found) { 3540 PetscInt old_index; 3541 3542 root_data[first_index] += start; 3543 old_index = first_index; 3544 for (i=first_index+1;i<Nl;i++) { 3545 if (root_data[i]) { 3546 root_data[i] += root_data[old_index]; 3547 old_index = i; 3548 } 3549 } 3550 } 3551 3552 /* from roots to leaves */ 3553 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3554 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3555 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3556 3557 /* create new IS with global indexes without holes */ 3558 if (subset_mult) { 3559 const PetscInt* idxs_mult; 3560 PetscInt cum; 3561 3562 cum = 0; 3563 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3564 for (i=0;i<n;i++) { 3565 PetscInt j; 3566 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3567 } 3568 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3569 } else { 3570 for (i=0;i<n;i++) { 3571 gidxs[i] = leaf_data[i]-1; 3572 } 3573 } 3574 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3575 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3576 PetscFunctionReturn(0); 3577 } 3578 3579 #undef __FUNCT__ 3580 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3581 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3582 { 3583 PetscInt i,j; 3584 PetscScalar *alphas; 3585 PetscErrorCode ierr; 3586 3587 PetscFunctionBegin; 3588 /* this implements stabilized Gram-Schmidt */ 3589 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3590 for (i=0;i<n;i++) { 3591 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3592 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3593 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3594 } 3595 ierr = PetscFree(alphas);CHKERRQ(ierr); 3596 PetscFunctionReturn(0); 3597 } 3598 3599 #undef __FUNCT__ 3600 #define __FUNCT__ "MatISGetSubassemblingPattern" 3601 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3602 { 3603 IS ranks_send_to; 3604 PetscInt n_neighs,*neighs,*n_shared,**shared; 3605 PetscMPIInt size,rank,color; 3606 PetscInt *xadj,*adjncy; 3607 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3608 PetscInt i,local_size,threshold=0; 3609 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3610 PetscSubcomm subcomm; 3611 PetscErrorCode ierr; 3612 3613 PetscFunctionBegin; 3614 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3615 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3616 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3617 3618 /* Get info on mapping */ 3619 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3620 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3621 3622 /* build local CSR graph of subdomains' connectivity */ 3623 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3624 xadj[0] = 0; 3625 xadj[1] = PetscMax(n_neighs-1,0); 3626 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3627 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3628 3629 if (threshold) { 3630 PetscInt xadj_count = 0; 3631 for (i=1;i<n_neighs;i++) { 3632 if (n_shared[i] > threshold) { 3633 adjncy[xadj_count] = neighs[i]; 3634 adjncy_wgt[xadj_count] = n_shared[i]; 3635 xadj_count++; 3636 } 3637 } 3638 xadj[1] = xadj_count; 3639 } else { 3640 if (xadj[1]) { 3641 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3642 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3643 } 3644 } 3645 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3646 if (use_square) { 3647 for (i=0;i<xadj[1];i++) { 3648 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3649 } 3650 } 3651 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3652 3653 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3654 3655 /* 3656 Restrict work on active processes only. 3657 */ 3658 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3659 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3660 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3661 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3662 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3663 if (color) { 3664 ierr = PetscFree(xadj);CHKERRQ(ierr); 3665 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3666 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3667 } else { 3668 Mat subdomain_adj; 3669 IS new_ranks,new_ranks_contig; 3670 MatPartitioning partitioner; 3671 PetscInt prank,rstart=0,rend=0; 3672 PetscInt *is_indices,*oldranks; 3673 PetscBool aggregate; 3674 3675 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3676 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3677 prank = rank; 3678 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3679 /* 3680 for (i=0;i<size;i++) { 3681 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3682 } 3683 */ 3684 for (i=0;i<xadj[1];i++) { 3685 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3686 } 3687 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3688 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3689 if (aggregate) { 3690 PetscInt lrows,row,ncols,*cols; 3691 PetscMPIInt nrank; 3692 PetscScalar *vals; 3693 3694 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3695 lrows = 0; 3696 if (nrank<redprocs) { 3697 lrows = size/redprocs; 3698 if (nrank<size%redprocs) lrows++; 3699 } 3700 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3701 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3702 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3703 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3704 row = nrank; 3705 ncols = xadj[1]-xadj[0]; 3706 cols = adjncy; 3707 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3708 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3709 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3710 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 ierr = PetscFree(xadj);CHKERRQ(ierr); 3713 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3714 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3715 ierr = PetscFree(vals);CHKERRQ(ierr); 3716 } else { 3717 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3718 } 3719 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3720 3721 /* Partition */ 3722 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3723 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3724 if (use_vwgt) { 3725 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3726 v_wgt[0] = local_size; 3727 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3728 } 3729 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3730 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3731 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3732 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3733 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3734 3735 /* renumber new_ranks to avoid "holes" in new set of processors */ 3736 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3737 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3738 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3739 if (!redprocs) { 3740 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3741 } else { 3742 PetscInt idxs[1]; 3743 PetscMPIInt tag; 3744 MPI_Request *reqs; 3745 3746 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3747 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3748 for (i=rstart;i<rend;i++) { 3749 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3750 } 3751 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3752 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3753 ierr = PetscFree(reqs);CHKERRQ(ierr); 3754 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3755 } 3756 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3757 /* clean up */ 3758 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3759 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3760 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3761 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3762 } 3763 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3764 3765 /* assemble parallel IS for sends */ 3766 i = 1; 3767 if (color) i=0; 3768 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3769 /* get back IS */ 3770 *is_sends = ranks_send_to; 3771 PetscFunctionReturn(0); 3772 } 3773 3774 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3775 3776 #undef __FUNCT__ 3777 #define __FUNCT__ "MatISSubassemble" 3778 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3779 { 3780 Mat local_mat; 3781 IS is_sends_internal; 3782 PetscInt rows,cols,new_local_rows; 3783 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3784 PetscBool ismatis,isdense,newisdense,destroy_mat; 3785 ISLocalToGlobalMapping l2gmap; 3786 PetscInt* l2gmap_indices; 3787 const PetscInt* is_indices; 3788 MatType new_local_type; 3789 /* buffers */ 3790 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3791 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3792 PetscInt *recv_buffer_idxs_local; 3793 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3794 /* MPI */ 3795 MPI_Comm comm,comm_n; 3796 PetscSubcomm subcomm; 3797 PetscMPIInt n_sends,n_recvs,commsize; 3798 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3799 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3800 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3801 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3802 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3803 PetscErrorCode ierr; 3804 3805 PetscFunctionBegin; 3806 /* TODO: add missing checks */ 3807 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3808 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3809 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3810 PetscValidLogicalCollectiveInt(mat,nis,7); 3811 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3812 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3813 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3814 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3815 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3816 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3817 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3818 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3819 PetscInt mrows,mcols,mnrows,mncols; 3820 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3821 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3822 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3823 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3824 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3825 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3826 } 3827 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3828 PetscValidLogicalCollectiveInt(mat,bs,0); 3829 /* prepare IS for sending if not provided */ 3830 if (!is_sends) { 3831 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3832 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3833 } else { 3834 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3835 is_sends_internal = is_sends; 3836 } 3837 3838 /* get comm */ 3839 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3840 3841 /* compute number of sends */ 3842 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3843 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3844 3845 /* compute number of receives */ 3846 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3847 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3848 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3849 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3850 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3851 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3852 ierr = PetscFree(iflags);CHKERRQ(ierr); 3853 3854 /* restrict comm if requested */ 3855 subcomm = 0; 3856 destroy_mat = PETSC_FALSE; 3857 if (restrict_comm) { 3858 PetscMPIInt color,subcommsize; 3859 3860 color = 0; 3861 if (restrict_full) { 3862 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3863 } else { 3864 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3865 } 3866 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3867 subcommsize = commsize - subcommsize; 3868 /* check if reuse has been requested */ 3869 if (reuse == MAT_REUSE_MATRIX) { 3870 if (*mat_n) { 3871 PetscMPIInt subcommsize2; 3872 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3873 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3874 comm_n = PetscObjectComm((PetscObject)*mat_n); 3875 } else { 3876 comm_n = PETSC_COMM_SELF; 3877 } 3878 } else { /* MAT_INITIAL_MATRIX */ 3879 PetscMPIInt rank; 3880 3881 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3882 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3883 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3884 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3885 comm_n = PetscSubcommChild(subcomm); 3886 } 3887 /* flag to destroy *mat_n if not significative */ 3888 if (color) destroy_mat = PETSC_TRUE; 3889 } else { 3890 comm_n = comm; 3891 } 3892 3893 /* prepare send/receive buffers */ 3894 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3895 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3896 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3897 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3898 if (nis) { 3899 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3900 } 3901 3902 /* Get data from local matrices */ 3903 if (!isdense) { 3904 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3905 /* TODO: See below some guidelines on how to prepare the local buffers */ 3906 /* 3907 send_buffer_vals should contain the raw values of the local matrix 3908 send_buffer_idxs should contain: 3909 - MatType_PRIVATE type 3910 - PetscInt size_of_l2gmap 3911 - PetscInt global_row_indices[size_of_l2gmap] 3912 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3913 */ 3914 } else { 3915 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3916 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3917 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3918 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3919 send_buffer_idxs[1] = i; 3920 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3921 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3922 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3923 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3924 for (i=0;i<n_sends;i++) { 3925 ilengths_vals[is_indices[i]] = len*len; 3926 ilengths_idxs[is_indices[i]] = len+2; 3927 } 3928 } 3929 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3930 /* additional is (if any) */ 3931 if (nis) { 3932 PetscMPIInt psum; 3933 PetscInt j; 3934 for (j=0,psum=0;j<nis;j++) { 3935 PetscInt plen; 3936 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3937 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3938 psum += len+1; /* indices + lenght */ 3939 } 3940 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3941 for (j=0,psum=0;j<nis;j++) { 3942 PetscInt plen; 3943 const PetscInt *is_array_idxs; 3944 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3945 send_buffer_idxs_is[psum] = plen; 3946 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3947 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3948 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3949 psum += plen+1; /* indices + lenght */ 3950 } 3951 for (i=0;i<n_sends;i++) { 3952 ilengths_idxs_is[is_indices[i]] = psum; 3953 } 3954 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3955 } 3956 3957 buf_size_idxs = 0; 3958 buf_size_vals = 0; 3959 buf_size_idxs_is = 0; 3960 for (i=0;i<n_recvs;i++) { 3961 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3962 buf_size_vals += (PetscInt)olengths_vals[i]; 3963 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3964 } 3965 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3966 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3967 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3968 3969 /* get new tags for clean communications */ 3970 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3971 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3972 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3973 3974 /* allocate for requests */ 3975 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3976 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3977 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3978 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3979 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3980 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3981 3982 /* communications */ 3983 ptr_idxs = recv_buffer_idxs; 3984 ptr_vals = recv_buffer_vals; 3985 ptr_idxs_is = recv_buffer_idxs_is; 3986 for (i=0;i<n_recvs;i++) { 3987 source_dest = onodes[i]; 3988 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3989 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3990 ptr_idxs += olengths_idxs[i]; 3991 ptr_vals += olengths_vals[i]; 3992 if (nis) { 3993 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3994 ptr_idxs_is += olengths_idxs_is[i]; 3995 } 3996 } 3997 for (i=0;i<n_sends;i++) { 3998 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3999 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4000 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4001 if (nis) { 4002 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4003 } 4004 } 4005 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4006 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4007 4008 /* assemble new l2g map */ 4009 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4010 ptr_idxs = recv_buffer_idxs; 4011 new_local_rows = 0; 4012 for (i=0;i<n_recvs;i++) { 4013 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4014 ptr_idxs += olengths_idxs[i]; 4015 } 4016 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4017 ptr_idxs = recv_buffer_idxs; 4018 new_local_rows = 0; 4019 for (i=0;i<n_recvs;i++) { 4020 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4021 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4022 ptr_idxs += olengths_idxs[i]; 4023 } 4024 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4025 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4026 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4027 4028 /* infer new local matrix type from received local matrices type */ 4029 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4030 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4031 if (n_recvs) { 4032 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4033 ptr_idxs = recv_buffer_idxs; 4034 for (i=0;i<n_recvs;i++) { 4035 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4036 new_local_type_private = MATAIJ_PRIVATE; 4037 break; 4038 } 4039 ptr_idxs += olengths_idxs[i]; 4040 } 4041 switch (new_local_type_private) { 4042 case MATDENSE_PRIVATE: 4043 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4044 new_local_type = MATSEQAIJ; 4045 bs = 1; 4046 } else { /* if I receive only 1 dense matrix */ 4047 new_local_type = MATSEQDENSE; 4048 bs = 1; 4049 } 4050 break; 4051 case MATAIJ_PRIVATE: 4052 new_local_type = MATSEQAIJ; 4053 bs = 1; 4054 break; 4055 case MATBAIJ_PRIVATE: 4056 new_local_type = MATSEQBAIJ; 4057 break; 4058 case MATSBAIJ_PRIVATE: 4059 new_local_type = MATSEQSBAIJ; 4060 break; 4061 default: 4062 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4063 break; 4064 } 4065 } else { /* by default, new_local_type is seqdense */ 4066 new_local_type = MATSEQDENSE; 4067 bs = 1; 4068 } 4069 4070 /* create MATIS object if needed */ 4071 if (reuse == MAT_INITIAL_MATRIX) { 4072 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4073 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4074 } else { 4075 /* it also destroys the local matrices */ 4076 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4077 } 4078 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4079 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4080 4081 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4082 4083 /* Global to local map of received indices */ 4084 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4085 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4086 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4087 4088 /* restore attributes -> type of incoming data and its size */ 4089 buf_size_idxs = 0; 4090 for (i=0;i<n_recvs;i++) { 4091 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4092 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4093 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4094 } 4095 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4096 4097 /* set preallocation */ 4098 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4099 if (!newisdense) { 4100 PetscInt *new_local_nnz=0; 4101 4102 ptr_vals = recv_buffer_vals; 4103 ptr_idxs = recv_buffer_idxs_local; 4104 if (n_recvs) { 4105 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4106 } 4107 for (i=0;i<n_recvs;i++) { 4108 PetscInt j; 4109 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4110 for (j=0;j<*(ptr_idxs+1);j++) { 4111 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4112 } 4113 } else { 4114 /* TODO */ 4115 } 4116 ptr_idxs += olengths_idxs[i]; 4117 } 4118 if (new_local_nnz) { 4119 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4120 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4121 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4122 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4123 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4124 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4125 } else { 4126 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4127 } 4128 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4129 } else { 4130 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4131 } 4132 4133 /* set values */ 4134 ptr_vals = recv_buffer_vals; 4135 ptr_idxs = recv_buffer_idxs_local; 4136 for (i=0;i<n_recvs;i++) { 4137 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4138 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4139 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4140 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4141 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4142 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4143 } else { 4144 /* TODO */ 4145 } 4146 ptr_idxs += olengths_idxs[i]; 4147 ptr_vals += olengths_vals[i]; 4148 } 4149 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4152 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4153 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4154 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4155 4156 #if 0 4157 if (!restrict_comm) { /* check */ 4158 Vec lvec,rvec; 4159 PetscReal infty_error; 4160 4161 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4162 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4163 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4164 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4165 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4166 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4167 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4168 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4169 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4170 } 4171 #endif 4172 4173 /* assemble new additional is (if any) */ 4174 if (nis) { 4175 PetscInt **temp_idxs,*count_is,j,psum; 4176 4177 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4178 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4179 ptr_idxs = recv_buffer_idxs_is; 4180 psum = 0; 4181 for (i=0;i<n_recvs;i++) { 4182 for (j=0;j<nis;j++) { 4183 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4184 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4185 psum += plen; 4186 ptr_idxs += plen+1; /* shift pointer to received data */ 4187 } 4188 } 4189 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4190 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4191 for (i=1;i<nis;i++) { 4192 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4193 } 4194 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4195 ptr_idxs = recv_buffer_idxs_is; 4196 for (i=0;i<n_recvs;i++) { 4197 for (j=0;j<nis;j++) { 4198 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4199 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4200 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4201 ptr_idxs += plen+1; /* shift pointer to received data */ 4202 } 4203 } 4204 for (i=0;i<nis;i++) { 4205 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4206 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4207 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4208 } 4209 ierr = PetscFree(count_is);CHKERRQ(ierr); 4210 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4211 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4212 } 4213 /* free workspace */ 4214 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4215 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4216 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4217 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4218 if (isdense) { 4219 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4220 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4221 } else { 4222 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4223 } 4224 if (nis) { 4225 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4226 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4227 } 4228 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4229 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4230 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4231 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4232 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4233 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4234 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4235 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4236 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4237 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4238 ierr = PetscFree(onodes);CHKERRQ(ierr); 4239 if (nis) { 4240 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4241 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4242 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4243 } 4244 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4245 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4246 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4247 for (i=0;i<nis;i++) { 4248 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4249 } 4250 *mat_n = NULL; 4251 } 4252 PetscFunctionReturn(0); 4253 } 4254 4255 /* temporary hack into ksp private data structure */ 4256 #include <petsc/private/kspimpl.h> 4257 4258 #undef __FUNCT__ 4259 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4260 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4261 { 4262 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4263 PC_IS *pcis = (PC_IS*)pc->data; 4264 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4265 MatNullSpace CoarseNullSpace=NULL; 4266 ISLocalToGlobalMapping coarse_islg; 4267 IS coarse_is,*isarray; 4268 PetscInt i,im_active=-1,active_procs=-1; 4269 PetscInt nis,nisdofs,nisneu; 4270 PC pc_temp; 4271 PCType coarse_pc_type; 4272 KSPType coarse_ksp_type; 4273 PetscBool multilevel_requested,multilevel_allowed; 4274 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4275 Mat t_coarse_mat_is; 4276 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4277 PetscMPIInt all_procs; 4278 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4279 PetscBool compute_vecs = PETSC_FALSE; 4280 PetscScalar *array; 4281 PetscErrorCode ierr; 4282 4283 PetscFunctionBegin; 4284 /* Assign global numbering to coarse dofs */ 4285 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4286 PetscInt ocoarse_size; 4287 compute_vecs = PETSC_TRUE; 4288 ocoarse_size = pcbddc->coarse_size; 4289 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4290 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4291 /* see if we can avoid some work */ 4292 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4293 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4294 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4295 PC pc; 4296 PetscBool isbddc; 4297 4298 /* temporary workaround since PCBDDC does not have a reset method so far */ 4299 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4300 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4301 if (isbddc) { 4302 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4303 } 4304 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4305 coarse_reuse = PETSC_FALSE; 4306 } else { /* we can safely reuse already computed coarse matrix */ 4307 coarse_reuse = PETSC_TRUE; 4308 } 4309 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4310 coarse_reuse = PETSC_FALSE; 4311 } 4312 /* reset any subassembling information */ 4313 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4314 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4315 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4316 coarse_reuse = PETSC_TRUE; 4317 } 4318 4319 /* count "active" (i.e. with positive local size) and "void" processes */ 4320 im_active = !!(pcis->n); 4321 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4323 void_procs = all_procs-active_procs; 4324 csin_type_simple = PETSC_TRUE; 4325 redist = PETSC_FALSE; 4326 if (pcbddc->current_level && void_procs) { 4327 csin_ml = PETSC_TRUE; 4328 ncoarse_ml = void_procs; 4329 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4330 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4331 csin_ds = PETSC_TRUE; 4332 ncoarse_ds = pcbddc->redistribute_coarse; 4333 redist = PETSC_TRUE; 4334 } else { 4335 csin_ds = PETSC_TRUE; 4336 ncoarse_ds = active_procs; 4337 redist = PETSC_TRUE; 4338 } 4339 } else { 4340 csin_ml = PETSC_FALSE; 4341 ncoarse_ml = all_procs; 4342 if (void_procs) { 4343 csin_ds = PETSC_TRUE; 4344 ncoarse_ds = void_procs; 4345 csin_type_simple = PETSC_FALSE; 4346 } else { 4347 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4348 csin_ds = PETSC_TRUE; 4349 ncoarse_ds = pcbddc->redistribute_coarse; 4350 redist = PETSC_TRUE; 4351 } else { 4352 csin_ds = PETSC_FALSE; 4353 ncoarse_ds = all_procs; 4354 } 4355 } 4356 } 4357 4358 /* 4359 test if we can go multilevel: three conditions must be satisfied: 4360 - we have not exceeded the number of levels requested 4361 - we can actually subassemble the active processes 4362 - we can find a suitable number of MPI processes where we can place the subassembled problem 4363 */ 4364 multilevel_allowed = PETSC_FALSE; 4365 multilevel_requested = PETSC_FALSE; 4366 if (pcbddc->current_level < pcbddc->max_levels) { 4367 multilevel_requested = PETSC_TRUE; 4368 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4369 multilevel_allowed = PETSC_FALSE; 4370 } else { 4371 multilevel_allowed = PETSC_TRUE; 4372 } 4373 } 4374 /* determine number of process partecipating to coarse solver */ 4375 if (multilevel_allowed) { 4376 ncoarse = ncoarse_ml; 4377 csin = csin_ml; 4378 redist = PETSC_FALSE; 4379 } else { 4380 ncoarse = ncoarse_ds; 4381 csin = csin_ds; 4382 } 4383 4384 /* creates temporary l2gmap and IS for coarse indexes */ 4385 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4386 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4387 4388 /* creates temporary MATIS object for coarse matrix */ 4389 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4390 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4391 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4392 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4393 #if 0 4394 { 4395 PetscViewer viewer; 4396 char filename[256]; 4397 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4398 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4399 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4400 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4401 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4402 } 4403 #endif 4404 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4405 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4406 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4407 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4408 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4409 4410 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4411 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4412 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4413 const PetscInt *idxs; 4414 ISLocalToGlobalMapping tmap; 4415 4416 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4417 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4418 /* allocate space for temporary storage */ 4419 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4420 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4421 /* allocate for IS array */ 4422 nisdofs = pcbddc->n_ISForDofsLocal; 4423 nisneu = !!pcbddc->NeumannBoundariesLocal; 4424 nis = nisdofs + nisneu; 4425 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4426 /* dofs splitting */ 4427 for (i=0;i<nisdofs;i++) { 4428 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4429 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4430 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4431 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4432 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4433 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4434 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4435 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4436 } 4437 /* neumann boundaries */ 4438 if (pcbddc->NeumannBoundariesLocal) { 4439 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4440 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4441 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4442 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4443 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4444 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4445 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4446 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4447 } 4448 /* free memory */ 4449 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4450 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4451 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4452 } else { 4453 nis = 0; 4454 nisdofs = 0; 4455 nisneu = 0; 4456 isarray = NULL; 4457 } 4458 /* destroy no longer needed map */ 4459 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4460 4461 /* restrict on coarse candidates (if needed) */ 4462 coarse_mat_is = NULL; 4463 if (csin) { 4464 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4465 if (redist) { 4466 PetscMPIInt rank; 4467 PetscInt spc,n_spc_p1,dest[1],destsize; 4468 4469 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4470 spc = active_procs/ncoarse; 4471 n_spc_p1 = active_procs%ncoarse; 4472 if (im_active) { 4473 destsize = 1; 4474 if (rank > n_spc_p1*(spc+1)-1) { 4475 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4476 } else { 4477 dest[0] = rank/(spc+1); 4478 } 4479 } else { 4480 destsize = 0; 4481 } 4482 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4483 } else if (csin_type_simple) { 4484 PetscMPIInt rank; 4485 PetscInt issize,isidx; 4486 4487 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4488 if (im_active) { 4489 issize = 1; 4490 isidx = (PetscInt)rank; 4491 } else { 4492 issize = 0; 4493 isidx = -1; 4494 } 4495 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4496 } else { /* get a suitable subassembling pattern from MATIS code */ 4497 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4498 } 4499 4500 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4501 if (!redist || ncoarse <= void_procs) { 4502 PetscInt ncoarse_cand,tissize,*nisindices; 4503 PetscInt *coarse_candidates; 4504 const PetscInt* tisindices; 4505 4506 /* get coarse candidates' ranks in pc communicator */ 4507 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4508 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4509 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4510 if (!coarse_candidates[i]) { 4511 coarse_candidates[ncoarse_cand++]=i; 4512 } 4513 } 4514 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4515 4516 4517 if (pcbddc->dbg_flag) { 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4520 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4522 for (i=0;i<ncoarse_cand;i++) { 4523 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4524 } 4525 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4527 } 4528 /* shift the pattern on coarse candidates */ 4529 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4530 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4531 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4532 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4533 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4534 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4535 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4536 } 4537 if (pcbddc->dbg_flag) { 4538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4539 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4540 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4542 } 4543 } 4544 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4545 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4546 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4547 } else { /* this is the last level, so use just receiving processes in subcomm */ 4548 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4549 } 4550 } else { 4551 if (pcbddc->dbg_flag) { 4552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4553 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4555 } 4556 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4557 coarse_mat_is = t_coarse_mat_is; 4558 } 4559 4560 /* create local to global scatters for coarse problem */ 4561 if (compute_vecs) { 4562 PetscInt lrows; 4563 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4564 if (coarse_mat_is) { 4565 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4566 } else { 4567 lrows = 0; 4568 } 4569 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4570 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4571 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4572 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4573 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4574 } 4575 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4576 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4577 4578 /* set defaults for coarse KSP and PC */ 4579 if (multilevel_allowed) { 4580 coarse_ksp_type = KSPRICHARDSON; 4581 coarse_pc_type = PCBDDC; 4582 } else { 4583 coarse_ksp_type = KSPPREONLY; 4584 coarse_pc_type = PCREDUNDANT; 4585 } 4586 4587 /* print some info if requested */ 4588 if (pcbddc->dbg_flag) { 4589 if (!multilevel_allowed) { 4590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4591 if (multilevel_requested) { 4592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4593 } else if (pcbddc->max_levels) { 4594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4595 } 4596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4597 } 4598 } 4599 4600 /* create the coarse KSP object only once with defaults */ 4601 if (coarse_mat_is) { 4602 MatReuse coarse_mat_reuse; 4603 PetscViewer dbg_viewer = NULL; 4604 if (pcbddc->dbg_flag) { 4605 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4606 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4607 } 4608 if (!pcbddc->coarse_ksp) { 4609 char prefix[256],str_level[16]; 4610 size_t len; 4611 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4612 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4613 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4614 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4615 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4616 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4617 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4618 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4619 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4620 /* prefix */ 4621 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4622 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4623 if (!pcbddc->current_level) { 4624 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4625 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4626 } else { 4627 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4628 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4629 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4630 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4631 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4632 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4633 } 4634 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4635 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4636 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4637 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4638 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4639 /* allow user customization */ 4640 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4641 } 4642 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4643 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4644 if (nisdofs) { 4645 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4646 for (i=0;i<nisdofs;i++) { 4647 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4648 } 4649 } 4650 if (nisneu) { 4651 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4652 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4653 } 4654 4655 /* get some info after set from options */ 4656 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4657 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4658 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4659 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4660 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4661 isbddc = PETSC_FALSE; 4662 } 4663 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4664 if (isredundant) { 4665 KSP inner_ksp; 4666 PC inner_pc; 4667 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4668 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4669 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4670 } 4671 4672 /* assemble coarse matrix */ 4673 if (coarse_reuse) { 4674 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4675 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4676 coarse_mat_reuse = MAT_REUSE_MATRIX; 4677 } else { 4678 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4679 } 4680 if (isbddc || isnn) { 4681 if (pcbddc->coarsening_ratio > 1) { 4682 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4683 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4684 if (pcbddc->dbg_flag) { 4685 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4686 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4687 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4688 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4689 } 4690 } 4691 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4692 } else { 4693 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4694 coarse_mat = coarse_mat_is; 4695 } 4696 } else { 4697 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4698 } 4699 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4700 4701 /* propagate symmetry info of coarse matrix */ 4702 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4703 if (pc->pmat->symmetric_set) { 4704 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4705 } 4706 if (pc->pmat->hermitian_set) { 4707 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4708 } 4709 if (pc->pmat->spd_set) { 4710 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4711 } 4712 /* set operators */ 4713 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4714 if (pcbddc->dbg_flag) { 4715 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4716 } 4717 } else { /* processes non partecipating to coarse solver (if any) */ 4718 coarse_mat = 0; 4719 } 4720 ierr = PetscFree(isarray);CHKERRQ(ierr); 4721 #if 0 4722 { 4723 PetscViewer viewer; 4724 char filename[256]; 4725 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4726 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4727 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4728 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4729 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4730 } 4731 #endif 4732 4733 /* Compute coarse null space (special handling by BDDC only) */ 4734 #if 0 4735 if (pcbddc->NullSpace) { 4736 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4737 } 4738 #endif 4739 4740 if (pcbddc->coarse_ksp) { 4741 Vec crhs,csol; 4742 PetscBool ispreonly; 4743 4744 if (CoarseNullSpace) { 4745 if (isbddc) { 4746 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4747 } else { 4748 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4749 } 4750 } 4751 /* setup coarse ksp */ 4752 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4753 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4754 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4755 /* hack */ 4756 if (!csol) { 4757 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4758 } 4759 if (!crhs) { 4760 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4761 } 4762 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4763 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4764 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4765 KSP check_ksp; 4766 KSPType check_ksp_type; 4767 PC check_pc; 4768 Vec check_vec,coarse_vec; 4769 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4770 PetscInt its; 4771 PetscBool compute_eigs; 4772 PetscReal *eigs_r,*eigs_c; 4773 PetscInt neigs; 4774 const char *prefix; 4775 4776 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4777 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4778 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4779 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4780 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4781 if (ispreonly) { 4782 check_ksp_type = KSPPREONLY; 4783 compute_eigs = PETSC_FALSE; 4784 } else { 4785 check_ksp_type = KSPGMRES; 4786 compute_eigs = PETSC_TRUE; 4787 } 4788 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4789 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4790 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4791 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4792 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4793 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4794 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4795 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4796 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4797 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4798 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4799 /* create random vec */ 4800 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4801 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4802 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4803 if (CoarseNullSpace) { 4804 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4805 } 4806 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4807 /* solve coarse problem */ 4808 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4809 if (CoarseNullSpace) { 4810 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4811 } 4812 /* set eigenvalue estimation if preonly has not been requested */ 4813 if (compute_eigs) { 4814 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4815 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4816 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4817 lambda_max = eigs_r[neigs-1]; 4818 lambda_min = eigs_r[0]; 4819 if (pcbddc->use_coarse_estimates) { 4820 if (lambda_max>lambda_min) { 4821 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4822 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4823 } 4824 } 4825 } 4826 4827 /* check coarse problem residual error */ 4828 if (pcbddc->dbg_flag) { 4829 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4830 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4831 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4832 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4833 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4834 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4835 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4837 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4838 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4839 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4840 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4841 if (compute_eigs) { 4842 PetscReal lambda_max_s,lambda_min_s; 4843 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4844 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4845 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4846 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4847 for (i=0;i<neigs;i++) { 4848 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4849 } 4850 } 4851 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4852 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4853 } 4854 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4855 if (compute_eigs) { 4856 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4857 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4858 } 4859 } 4860 } 4861 /* print additional info */ 4862 if (pcbddc->dbg_flag) { 4863 /* waits until all processes reaches this point */ 4864 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4866 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4867 } 4868 4869 /* free memory */ 4870 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4871 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4877 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4878 { 4879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4880 PC_IS* pcis = (PC_IS*)pc->data; 4881 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4882 IS subset,subset_mult,subset_n; 4883 PetscInt local_size,coarse_size=0; 4884 PetscInt *local_primal_indices=NULL; 4885 const PetscInt *t_local_primal_indices; 4886 PetscErrorCode ierr; 4887 4888 PetscFunctionBegin; 4889 /* Compute global number of coarse dofs */ 4890 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4891 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4892 } 4893 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4894 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4895 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4896 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4897 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4898 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4899 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4900 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4901 if (local_size != pcbddc->local_primal_size) { 4902 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4903 } 4904 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4905 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4906 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4907 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4908 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4909 4910 /* check numbering */ 4911 if (pcbddc->dbg_flag) { 4912 PetscScalar coarsesum,*array,*array2; 4913 PetscInt i; 4914 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4915 4916 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4918 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4919 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4920 /* counter */ 4921 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4922 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4923 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4924 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4925 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4926 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4927 4928 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4929 for (i=0;i<pcbddc->local_primal_size;i++) { 4930 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4931 } 4932 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4933 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4934 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4935 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4936 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4937 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4938 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4939 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4940 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4941 for (i=0;i<pcis->n;i++) { 4942 if (array[i] != 0.0 && array[i] != array2[i]) { 4943 PetscInt owned = (PetscInt)(array[i]); 4944 PetscInt neigh = (PetscInt)(array2[i]); 4945 set_error = PETSC_TRUE; 4946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4947 } 4948 } 4949 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4950 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4952 for (i=0;i<pcis->n;i++) { 4953 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4954 } 4955 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4956 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4957 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4958 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4959 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4960 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4961 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4962 PetscInt *gidxs; 4963 4964 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4965 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4969 for (i=0;i<pcbddc->local_primal_size;i++) { 4970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4971 } 4972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4973 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4974 } 4975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4976 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4977 } 4978 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4979 /* get back data */ 4980 *coarse_size_n = coarse_size; 4981 *local_primal_indices_n = local_primal_indices; 4982 PetscFunctionReturn(0); 4983 } 4984 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "PCBDDCGlobalToLocal" 4987 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4988 { 4989 IS localis_t; 4990 PetscInt i,lsize,*idxs,n; 4991 PetscScalar *vals; 4992 PetscErrorCode ierr; 4993 4994 PetscFunctionBegin; 4995 /* get indices in local ordering exploiting local to global map */ 4996 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4997 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4998 for (i=0;i<lsize;i++) vals[i] = 1.0; 4999 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5000 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5001 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5002 if (idxs) { /* multilevel guard */ 5003 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5004 } 5005 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5006 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5007 ierr = PetscFree(vals);CHKERRQ(ierr); 5008 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5009 /* now compute set in local ordering */ 5010 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5011 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5012 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5013 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5014 for (i=0,lsize=0;i<n;i++) { 5015 if (PetscRealPart(vals[i]) > 0.5) { 5016 lsize++; 5017 } 5018 } 5019 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5020 for (i=0,lsize=0;i<n;i++) { 5021 if (PetscRealPart(vals[i]) > 0.5) { 5022 idxs[lsize++] = i; 5023 } 5024 } 5025 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5026 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5027 *localis = localis_t; 5028 PetscFunctionReturn(0); 5029 } 5030 5031 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5032 #undef __FUNCT__ 5033 #define __FUNCT__ "PCBDDCMatMult_Private" 5034 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5035 { 5036 PCBDDCChange_ctx change_ctx; 5037 PC_IS *pcis; 5038 PC_BDDC *pcbddc; 5039 PetscErrorCode ierr; 5040 5041 PetscFunctionBegin; 5042 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5043 pcis = (PC_IS*)(change_ctx->pc->data); 5044 pcbddc = (PC_BDDC*)(change_ctx->pc->data); 5045 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5046 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5047 if (pcbddc->benign_saddle_point) { 5048 Mat_IS* matis = (Mat_IS*)(change_ctx->original_mat->data); 5049 ierr = VecScatterBegin(matis->rctx,change_ctx->work[1],matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5050 ierr = VecScatterEnd(matis->rctx,change_ctx->work[1],matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5051 5052 if (pcbddc->benign_p0_lidx >=0) { 5053 Mat B0; 5054 Vec dummy_vec; 5055 PetscScalar *array; 5056 PetscInt ii[2]; 5057 5058 ii[0] = 0; 5059 ii[1] = pcbddc->B0_ncol; 5060 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 5061 ierr = MatCreateVecs(B0,NULL,&dummy_vec);CHKERRQ(ierr); 5062 ierr = MatMult(B0,matis->x,dummy_vec);CHKERRQ(ierr); 5063 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5064 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] benign? %1.4e\n",PetscGlobalRank,array[0]);CHKERRQ(ierr); 5065 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5066 ierr = MatDestroy(&B0);CHKERRQ(ierr); 5067 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5068 } 5069 } 5070 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 5074 #undef __FUNCT__ 5075 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5076 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5077 { 5078 PCBDDCChange_ctx change_ctx; 5079 PetscErrorCode ierr; 5080 5081 PetscFunctionBegin; 5082 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5083 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5084 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5085 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5086 PetscFunctionReturn(0); 5087 } 5088 5089 #undef __FUNCT__ 5090 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5091 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5092 { 5093 PC_IS *pcis=(PC_IS*)pc->data; 5094 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5095 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5096 Mat S_j; 5097 PetscInt *used_xadj,*used_adjncy; 5098 PetscBool free_used_adj; 5099 PetscErrorCode ierr; 5100 5101 PetscFunctionBegin; 5102 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5103 free_used_adj = PETSC_FALSE; 5104 if (pcbddc->sub_schurs_layers == -1) { 5105 used_xadj = NULL; 5106 used_adjncy = NULL; 5107 } else { 5108 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5109 used_xadj = pcbddc->mat_graph->xadj; 5110 used_adjncy = pcbddc->mat_graph->adjncy; 5111 } else if (pcbddc->computed_rowadj) { 5112 used_xadj = pcbddc->mat_graph->xadj; 5113 used_adjncy = pcbddc->mat_graph->adjncy; 5114 } else { 5115 PetscBool flg_row=PETSC_FALSE; 5116 const PetscInt *xadj,*adjncy; 5117 PetscInt nvtxs; 5118 5119 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5120 if (flg_row) { 5121 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5122 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5123 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5124 free_used_adj = PETSC_TRUE; 5125 } else { 5126 pcbddc->sub_schurs_layers = -1; 5127 used_xadj = NULL; 5128 used_adjncy = NULL; 5129 } 5130 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5131 } 5132 } 5133 5134 /* setup sub_schurs data */ 5135 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5136 if (!sub_schurs->use_mumps) { 5137 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5138 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5139 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5140 } else { 5141 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5142 PetscBool isseqaij; 5143 if (!pcbddc->use_vertices && reuse_solvers) { 5144 PetscInt n_vertices; 5145 5146 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5147 reuse_solvers = (PetscBool)!n_vertices; 5148 } 5149 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5150 if (!isseqaij) { 5151 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5152 if (matis->A == pcbddc->local_mat) { 5153 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5154 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5155 } else { 5156 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5157 } 5158 } 5159 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5160 } 5161 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5162 5163 /* free adjacency */ 5164 if (free_used_adj) { 5165 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5166 } 5167 PetscFunctionReturn(0); 5168 } 5169 5170 #undef __FUNCT__ 5171 #define __FUNCT__ "PCBDDCInitSubSchurs" 5172 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5173 { 5174 PC_IS *pcis=(PC_IS*)pc->data; 5175 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5176 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5177 PCBDDCGraph graph; 5178 PetscErrorCode ierr; 5179 5180 PetscFunctionBegin; 5181 /* attach interface graph for determining subsets */ 5182 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5183 IS verticesIS,verticescomm; 5184 PetscInt vsize,*idxs; 5185 5186 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5187 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5188 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5189 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5190 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5191 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5192 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5193 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5194 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5195 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5196 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5197 /* 5198 if (pcbddc->dbg_flag) { 5199 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5200 } 5201 */ 5202 } else { 5203 graph = pcbddc->mat_graph; 5204 } 5205 5206 /* sub_schurs init */ 5207 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5208 5209 /* free graph struct */ 5210 if (pcbddc->sub_schurs_rebuild) { 5211 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5212 } 5213 PetscFunctionReturn(0); 5214 } 5215