1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 19 } 20 if (pop) { /* use SF to get values */ 21 PetscScalar *array; 22 23 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 24 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 25 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 26 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 27 } else { /* use VecSetValue */ 28 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 29 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 30 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 37 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 38 { 39 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 40 PetscErrorCode ierr; 41 42 PetscFunctionBegin; 43 /* TODO: add error checking 44 - avoid nested pop (or push) calls. 45 - cannot push before pop. 46 */ 47 if (!pcbddc->zerodiag) { 48 PetscFunctionReturn(0); 49 } 50 if (pop) { 51 const PetscInt *cB0_cols; 52 PetscInt cB0_ncol; 53 const PetscScalar *cB0_vals; 54 55 /* extract B_0 */ 56 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 57 pcbddc->B0_ncol = cB0_ncol; 58 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 59 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 60 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 61 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 62 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 /* remove rows and cols from local problem */ 64 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 65 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 66 } else { /* push */ 67 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 68 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 69 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 70 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 71 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "PCBDDCAdaptiveSelection" 78 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 79 { 80 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 81 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 82 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 83 PetscBLASInt *B_iwork,*B_ifail; 84 PetscScalar *work,lwork; 85 PetscScalar *St,*S,*eigv; 86 PetscScalar *Sarray,*Starray; 87 PetscReal *eigs,thresh; 88 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 89 PetscBool allocated_S_St; 90 #if defined(PETSC_USE_COMPLEX) 91 PetscReal *rwork; 92 #endif 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 if (!sub_schurs->use_mumps) { 97 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 98 } 99 100 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 101 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 102 } 103 104 if (pcbddc->dbg_flag) { 105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 107 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 108 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 109 } 110 111 if (pcbddc->dbg_flag) { 112 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 113 } 114 115 /* max size of subsets */ 116 mss = 0; 117 for (i=0;i<sub_schurs->n_subs;i++) { 118 PetscInt subset_size; 119 120 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 121 mss = PetscMax(mss,subset_size); 122 } 123 124 /* min/max and threshold */ 125 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 126 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 127 nmax = PetscMax(nmin,nmax); 128 allocated_S_St = PETSC_FALSE; 129 if (nmin) { 130 allocated_S_St = PETSC_TRUE; 131 } 132 133 /* allocate lapack workspace */ 134 cum = cum2 = 0; 135 maxneigs = 0; 136 for (i=0;i<sub_schurs->n_subs;i++) { 137 PetscInt n,subset_size; 138 139 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 140 n = PetscMin(subset_size,nmax); 141 cum += subset_size; 142 cum2 += subset_size*n; 143 maxneigs = PetscMax(maxneigs,n); 144 } 145 if (mss) { 146 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 147 PetscBLASInt B_itype = 1; 148 PetscBLASInt B_N = mss; 149 PetscReal zero = 0.0; 150 PetscReal eps = 0.0; /* dlamch? */ 151 152 B_lwork = -1; 153 S = NULL; 154 St = NULL; 155 eigs = NULL; 156 eigv = NULL; 157 B_iwork = NULL; 158 B_ifail = NULL; 159 #if defined(PETSC_USE_COMPLEX) 160 rwork = NULL; 161 #endif 162 thresh = 1.0; 163 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 164 #if defined(PETSC_USE_COMPLEX) 165 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 166 #else 167 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 168 #endif 169 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 171 } else { 172 /* TODO */ 173 } 174 } else { 175 lwork = 0; 176 } 177 178 nv = 0; 179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 180 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 181 } 182 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 183 if (allocated_S_St) { 184 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 185 } 186 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 187 #if defined(PETSC_USE_COMPLEX) 188 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 189 #endif 190 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 191 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 192 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 193 nv+cum,&pcbddc->adaptive_constraints_idxs, 194 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 195 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 196 197 maxneigs = 0; 198 cum = cum2 = cumarray = 0; 199 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 200 pcbddc->adaptive_constraints_data_ptr[0] = 0; 201 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 202 const PetscInt *idxs; 203 204 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 205 for (cum=0;cum<nv;cum++) { 206 pcbddc->adaptive_constraints_n[cum] = 1; 207 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 208 pcbddc->adaptive_constraints_data[cum] = 1.0; 209 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 210 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 211 } 212 cum2 = cum; 213 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 214 } 215 216 if (mss) { /* multilevel */ 217 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 218 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 219 } 220 221 for (i=0;i<sub_schurs->n_subs;i++) { 222 223 const PetscInt *idxs; 224 PetscReal infty = PETSC_MAX_REAL; 225 PetscInt j,subset_size,eigs_start = 0; 226 PetscBLASInt B_N; 227 PetscBool same_data = PETSC_FALSE; 228 229 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 230 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 231 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 232 if (sub_schurs->is_hermitian) { 233 PetscInt j,k; 234 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 235 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 236 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 for (k=j;k<subset_size;k++) { 240 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 241 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 242 } 243 } 244 } else { 245 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 246 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 247 } 248 } else { 249 S = Sarray + cumarray; 250 St = Starray + cumarray; 251 } 252 253 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 254 /* see if we can save some work */ 255 if (sub_schurs->n_subs == 1) { 256 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 257 } 258 259 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 260 B_neigs = 0; 261 } else { 262 /* Threshold: this is an heuristic for edges */ 263 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 264 265 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 266 PetscBLASInt B_itype = 1; 267 PetscBLASInt B_IL, B_IU; 268 PetscReal eps = -1.0; /* dlamch? */ 269 PetscInt nmin_s; 270 271 /* ask for eigenvalues larger than thresh */ 272 if (pcbddc->dbg_flag) { 273 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 274 } 275 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 276 #if defined(PETSC_USE_COMPLEX) 277 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 278 #else 279 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 280 #endif 281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 282 if (B_ierr) { 283 if (B_ierr < 0 ) { 284 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 285 } else if (B_ierr <= B_N) { 286 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 287 } else { 288 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 289 } 290 } 291 292 if (B_neigs > nmax) { 293 if (pcbddc->dbg_flag) { 294 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 295 } 296 eigs_start = B_neigs -nmax; 297 B_neigs = nmax; 298 } 299 300 nmin_s = PetscMin(nmin,B_N); 301 if (B_neigs < nmin_s) { 302 PetscBLASInt B_neigs2; 303 304 B_IU = B_N - B_neigs; 305 B_IL = B_N - nmin_s + 1; 306 if (pcbddc->dbg_flag) { 307 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 308 } 309 if (sub_schurs->is_hermitian) { 310 PetscInt j; 311 for (j=0;j<subset_size;j++) { 312 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 313 } 314 for (j=0;j<subset_size;j++) { 315 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 316 } 317 } else { 318 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 319 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 320 } 321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 322 #if defined(PETSC_USE_COMPLEX) 323 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 324 #else 325 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 326 #endif 327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 328 B_neigs += B_neigs2; 329 } 330 if (B_ierr) { 331 if (B_ierr < 0 ) { 332 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 333 } else if (B_ierr <= B_N) { 334 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 335 } else { 336 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 337 } 338 } 339 if (pcbddc->dbg_flag) { 340 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 341 for (j=0;j<B_neigs;j++) { 342 if (eigs[j] == 0.0) { 343 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 344 } else { 345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 346 } 347 } 348 } 349 } else { 350 /* TODO */ 351 } 352 } 353 maxneigs = PetscMax(B_neigs,maxneigs); 354 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 355 if (B_neigs) { 356 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 357 358 if (pcbddc->dbg_flag > 1) { 359 PetscInt ii; 360 for (ii=0;ii<B_neigs;ii++) { 361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 362 for (j=0;j<B_N;j++) { 363 #if defined(PETSC_USE_COMPLEX) 364 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 365 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 367 #else 368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 369 #endif 370 } 371 } 372 } 373 #if 0 374 for (j=0;j<B_neigs;j++) { 375 PetscBLASInt Blas_N,Blas_one = 1.0; 376 PetscScalar norm; 377 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 378 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 379 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 380 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 381 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 382 } else { 383 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 384 } 385 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 } 387 #endif 388 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 389 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 390 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 391 cum++; 392 } 393 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 394 /* shift for next computation */ 395 cumarray += subset_size*subset_size; 396 } 397 if (pcbddc->dbg_flag) { 398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 399 } 400 401 if (mss) { 402 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 403 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 404 /* destroy matrices (junk) */ 405 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 406 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 407 } 408 if (allocated_S_St) { 409 ierr = PetscFree2(S,St);CHKERRQ(ierr); 410 } 411 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 412 #if defined(PETSC_USE_COMPLEX) 413 ierr = PetscFree(rwork);CHKERRQ(ierr); 414 #endif 415 if (pcbddc->dbg_flag) { 416 PetscInt maxneigs_r; 417 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "PCBDDCSetUpSolvers" 425 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 426 { 427 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 428 PetscScalar *coarse_submat_vals; 429 PetscErrorCode ierr; 430 431 PetscFunctionBegin; 432 /* Setup local scatters R_to_B and (optionally) R_to_D */ 433 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 434 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 435 436 /* Setup local neumann solver ksp_R */ 437 /* PCBDDCSetUpLocalScatters should be called first! */ 438 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 439 440 /* Change global null space passed in by the user if change of basis has been requested */ 441 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 442 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 443 } 444 445 /* 446 Setup local correction and local part of coarse basis. 447 Gives back the dense local part of the coarse matrix in column major ordering 448 */ 449 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 450 451 /* Compute total number of coarse nodes and setup coarse solver */ 452 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 453 454 /* free */ 455 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCResetCustomization" 461 PetscErrorCode PCBDDCResetCustomization(PC pc) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 468 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 469 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 470 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 471 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 472 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 473 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 474 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 475 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 476 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 477 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCResetTopography" 483 PetscErrorCode PCBDDCResetTopography(PC pc) 484 { 485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 492 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 493 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "PCBDDCResetSolvers" 499 PetscErrorCode PCBDDCResetSolvers(PC pc) 500 { 501 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 502 PetscScalar *array; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 507 if (pcbddc->coarse_phi_B) { 508 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 509 ierr = PetscFree(array);CHKERRQ(ierr); 510 } 511 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 512 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 513 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 514 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 515 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 519 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 520 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 521 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 522 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 523 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 524 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 525 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 526 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 527 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 528 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 529 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 530 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 531 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 532 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 533 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 534 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 535 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 536 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 537 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 538 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 544 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 545 { 546 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 547 PC_IS *pcis = (PC_IS*)pc->data; 548 VecType impVecType; 549 PetscInt n_constraints,n_R,old_size,n_benign; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 if (!pcbddc->ConstraintMatrix) { 554 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 555 } 556 /* get sizes */ 557 n_benign = (PetscInt)(!!pcbddc->zerodiag); 558 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 559 if (n_benign && n_constraints) { 560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 561 } 562 n_R = pcis->n - pcbddc->n_vertices; 563 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 564 /* local work vectors (try to avoid unneeded work)*/ 565 /* R nodes */ 566 old_size = -1; 567 if (pcbddc->vec1_R) { 568 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 569 } 570 if (n_R != old_size) { 571 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 572 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 573 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 574 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 575 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 576 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 577 } 578 /* local primal dofs */ 579 old_size = -1; 580 if (pcbddc->vec1_P) { 581 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 582 } 583 if (pcbddc->local_primal_size != old_size) { 584 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 585 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 586 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 587 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 588 } 589 /* local explicit constraints */ 590 old_size = -1; 591 if (pcbddc->vec1_C) { 592 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 593 } 594 if (n_constraints && n_constraints != old_size) { 595 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 596 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 597 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 598 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 599 } 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNCT__ 604 #define __FUNCT__ "PCBDDCSetUpCorrection" 605 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 606 { 607 PetscErrorCode ierr; 608 /* pointers to pcis and pcbddc */ 609 PC_IS* pcis = (PC_IS*)pc->data; 610 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 611 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 612 /* submatrices of local problem */ 613 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 614 /* submatrices of benign trick */ 615 Mat B0_R,B0_V; 616 /* submatrices of local coarse problem */ 617 Mat S_VV,S_CV,S_VC,S_CC; 618 /* working matrices */ 619 Mat C_CR; 620 /* additional working stuff */ 621 PC pc_R; 622 Mat F,B0 = NULL; 623 Vec vec_R_benign,vec_R_benign_2; 624 PetscBool isLU,isCHOL,isILU; 625 626 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 627 PetscScalar *work; 628 PetscInt *idx_V_B; 629 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 630 PetscInt i,n_R,n_D,n_B; 631 PetscBool unsymmetric_check; 632 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 633 MatType impMatType; 634 /* some shortcuts to scalars */ 635 PetscScalar one=1.0,m_one=-1.0; 636 637 PetscFunctionBegin; 638 n_benign = (PetscInt)(!!pcbddc->zerodiag); 639 n_vertices = pcbddc->n_vertices; 640 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 641 /* Set Non-overlapping dimensions */ 642 n_B = pcis->n_B; 643 n_D = pcis->n - n_B; 644 n_R = pcis->n - n_vertices; 645 646 /* Set types for local objects needed by BDDC precondtioner */ 647 impMatType = MATSEQDENSE; 648 649 /* vertices in boundary numbering */ 650 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 651 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 652 if (i != n_vertices) { 653 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 654 } 655 656 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 657 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 658 if (!n_benign) { 659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 660 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 662 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 663 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 664 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 665 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 666 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 667 } else { /* these wrappers are not needed with the benign trick */ 668 S_VV = S_CV = S_VC = S_CC = NULL; 669 } 670 671 unsymmetric_check = PETSC_FALSE; 672 /* allocate workspace */ 673 n = 0; 674 if (n_constraints) { 675 n += n_R*n_constraints; 676 } 677 if (n_vertices) { 678 n = PetscMax(2*n_R*n_vertices,n); 679 n = PetscMax((n_R+n_B)*n_vertices,n); 680 } 681 if (!pcbddc->symmetric_primal) { 682 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 683 unsymmetric_check = PETSC_TRUE; 684 } 685 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 686 687 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 688 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 690 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 691 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 692 if (isLU || isILU || isCHOL) { 693 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 694 } else if (sub_schurs->reuse_mumps) { 695 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 696 MatFactorType type; 697 698 F = reuse_mumps->F; 699 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 700 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 701 } else { 702 F = NULL; 703 } 704 705 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 706 if (n_constraints) { 707 Mat M1,M2,M3; 708 Mat auxmat; 709 IS is_aux; 710 PetscScalar *array,*array2; 711 712 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 713 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 714 715 /* Extract constraints on R nodes: C_{CR} */ 716 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 718 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 719 720 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 721 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 722 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 723 for (i=0;i<n_constraints;i++) { 724 const PetscScalar *row_cmat_values; 725 const PetscInt *row_cmat_indices; 726 PetscInt size_of_constraint,j; 727 728 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 729 for (j=0;j<size_of_constraint;j++) { 730 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 731 } 732 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 733 } 734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 735 if (F) { 736 Mat B; 737 738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 739 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 740 ierr = MatDestroy(&B);CHKERRQ(ierr); 741 } else { 742 PetscScalar *marr; 743 744 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 745 for (i=0;i<n_constraints;i++) { 746 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 747 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 748 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 749 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 750 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 751 } 752 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 753 } 754 if (!pcbddc->switch_static) { 755 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 756 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 757 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 758 for (i=0;i<n_constraints;i++) { 759 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 760 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 761 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 764 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 765 } 766 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 767 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 768 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 769 } else { 770 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 771 pcbddc->local_auxmat2 = local_auxmat2_R; 772 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 773 } 774 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 775 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 776 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 777 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 778 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 779 if (isCHOL) { 780 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 781 } else { 782 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 783 } 784 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 785 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 786 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 787 ierr = MatDestroy(&M2);CHKERRQ(ierr); 788 ierr = MatDestroy(&M3);CHKERRQ(ierr); 789 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 790 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 791 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 792 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 793 ierr = MatDestroy(&M1);CHKERRQ(ierr); 794 } 795 /* Get submatrices from subdomain matrix */ 796 if (n_vertices) { 797 IS is_aux; 798 799 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 800 IS tis; 801 802 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 803 ierr = ISSort(tis);CHKERRQ(ierr); 804 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 805 ierr = ISDestroy(&tis);CHKERRQ(ierr); 806 } else { 807 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 808 } 809 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 810 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 811 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 812 if (n_benign) { 813 IS dummy; 814 PetscInt ii[2]; 815 816 ii[0] = 0; 817 ii[1] = pcbddc->B0_ncol; 818 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 819 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 820 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 821 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 822 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 823 } 824 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 825 } 826 827 /* Matrix of coarse basis functions (local) */ 828 if (pcbddc->coarse_phi_B) { 829 PetscInt on_B,on_primal,on_D=n_D; 830 if (pcbddc->coarse_phi_D) { 831 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 832 } 833 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 834 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 835 PetscScalar *marray; 836 837 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 838 ierr = PetscFree(marray);CHKERRQ(ierr); 839 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 840 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 841 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 842 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 843 } 844 } 845 846 if (!pcbddc->coarse_phi_B) { 847 PetscScalar *marray; 848 849 n = n_B*pcbddc->local_primal_size; 850 if (pcbddc->switch_static || pcbddc->dbg_flag) { 851 n += n_D*pcbddc->local_primal_size; 852 } 853 if (!pcbddc->symmetric_primal) { 854 n *= 2; 855 } 856 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 857 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 858 n = n_B*pcbddc->local_primal_size; 859 if (pcbddc->switch_static || pcbddc->dbg_flag) { 860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 861 n += n_D*pcbddc->local_primal_size; 862 } 863 if (!pcbddc->symmetric_primal) { 864 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 865 if (pcbddc->switch_static || pcbddc->dbg_flag) { 866 n = n_B*pcbddc->local_primal_size; 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 868 } 869 } else { 870 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 871 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 872 if (pcbddc->switch_static || pcbddc->dbg_flag) { 873 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 874 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 875 } 876 } 877 } 878 879 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 880 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 881 const PetscInt *idxs; 882 883 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 884 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 885 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 886 } 887 /* vertices */ 888 if (n_vertices) { 889 890 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 891 892 if (n_R) { 893 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 894 Mat S_V_benign; 895 PetscBLASInt B_N,B_one = 1; 896 PetscScalar *x,*y; 897 PetscBool isseqaij; 898 899 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 900 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 901 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 902 if (F) { /* TODO could be optimized for symmetric problems */ 903 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 904 } else { 905 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 906 for (i=0;i<n_vertices;i++) { 907 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 908 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 909 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 910 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 911 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 912 } 913 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 914 } 915 if (n_benign) { 916 const PetscScalar *vals; 917 const PetscInt *idxs; 918 PetscInt n; 919 920 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 921 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr); 922 ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr); 923 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 924 ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 925 ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr); 926 ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr); 927 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 928 ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr); 929 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 930 } 931 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 932 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 933 if (n_constraints) { 934 Mat B; 935 936 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 937 for (i=0;i<n_vertices;i++) { 938 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 939 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 940 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 943 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 944 } 945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 946 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 947 ierr = MatDestroy(&B);CHKERRQ(ierr); 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 949 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 950 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 951 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 952 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 953 ierr = MatDestroy(&B);CHKERRQ(ierr); 954 } 955 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 956 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 957 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 958 } 959 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 960 if (n_benign) { 961 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 962 } 963 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 964 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 965 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 966 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 967 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 968 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 969 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 970 if (!n_benign) { 971 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 972 } else { 973 const PetscScalar *vals; 974 PetscScalar val; 975 const PetscInt *idxs; 976 PetscInt n,j; 977 978 /* Use explicit loops to populate the local coarse matrix */ 979 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 980 for (j=0;j<n_vertices;j++) { 981 for (i=0;i<n_vertices;i++) { 982 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 983 } 984 } 985 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 986 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 987 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 988 for (j=0;j<n_vertices;j++) { 989 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 990 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 991 } 992 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 993 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 995 ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr); 996 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val; 997 } 998 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 999 } else { 1000 /* TODO benign case also here */ 1001 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 1002 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1003 } 1004 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1005 /* coarse basis functions */ 1006 for (i=0;i<n_vertices;i++) { 1007 PetscScalar *y; 1008 1009 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1010 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1011 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1012 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 y[n_B*i+idx_V_B[i]] = 1.0; 1015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1016 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1017 1018 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1019 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1020 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1021 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1024 y[n_D*i+p0_lidx_I] = 0.0; 1025 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1026 } 1027 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1028 } 1029 if (n_benign) { 1030 PetscScalar *y; 1031 1032 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1033 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1034 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1037 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1038 1039 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1040 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1041 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr); 1042 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1045 y[n_D*n_vertices+p0_lidx_I] = 0.0; 1046 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1047 } 1048 } 1049 /* if n_R == 0 the object is not destroyed */ 1050 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1051 } 1052 1053 if (n_constraints) { 1054 Mat B; 1055 1056 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1057 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1058 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1059 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1060 if (n_vertices) { 1061 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1062 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1063 } else { 1064 Mat S_VCt; 1065 1066 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1067 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1068 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1069 } 1070 } 1071 ierr = MatDestroy(&B);CHKERRQ(ierr); 1072 /* coarse basis functions */ 1073 for (i=0;i<n_constraints;i++) { 1074 PetscScalar *y; 1075 1076 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1077 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1078 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1082 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1083 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1084 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1085 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1086 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1089 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1090 } 1091 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1092 } 1093 } 1094 if (n_constraints) { 1095 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1096 } 1097 1098 if (n_benign) { 1099 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1100 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1101 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1102 } 1103 1104 /* compute other basis functions for non-symmetric problems */ 1105 if (!pcbddc->symmetric_primal) { 1106 1107 if (n_constraints) { 1108 Mat S_CCT,B_C; 1109 1110 /* this is a lazy thing */ 1111 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1113 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1114 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1115 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1116 if (n_vertices) { 1117 Mat B_V,S_VCT; 1118 1119 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1120 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1121 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1122 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1123 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1124 } 1125 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1126 } else { /* if there are no constraints, reset work */ 1127 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1128 } 1129 if (n_vertices && n_R) { 1130 Mat A_VRT; 1131 PetscScalar *marray; 1132 PetscBLASInt B_N,B_one = 1; 1133 1134 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1135 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1136 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1137 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1138 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1139 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1140 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1141 } 1142 1143 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1144 for (i=0;i<pcbddc->local_primal_size;i++) { 1145 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1146 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1147 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1148 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1149 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1150 } 1151 } else { 1152 for (i=0;i<pcbddc->local_primal_size;i++) { 1153 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1154 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1155 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1156 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1157 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1158 } 1159 } 1160 /* coarse basis functions */ 1161 for (i=0;i<pcbddc->local_primal_size;i++) { 1162 PetscScalar *y; 1163 1164 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1165 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1166 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1167 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 if (i<n_vertices) { 1170 y[n_B*i+idx_V_B[i]] = 1.0; 1171 } 1172 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1173 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1174 1175 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1176 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1177 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1178 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1179 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1180 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1181 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1182 } 1183 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1184 } 1185 } 1186 /* free memory */ 1187 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1188 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1189 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1190 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1191 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1192 ierr = PetscFree(work);CHKERRQ(ierr); 1193 if (n_vertices) { 1194 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1195 } 1196 if (n_constraints) { 1197 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1198 } 1199 /* Checking coarse_sub_mat and coarse basis functios */ 1200 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1201 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1202 if (pcbddc->dbg_flag) { 1203 Mat coarse_sub_mat; 1204 Mat AUXMAT,TM1,TM2,TM3,TM4; 1205 Mat coarse_phi_D,coarse_phi_B; 1206 Mat coarse_psi_D,coarse_psi_B; 1207 Mat A_II,A_BB,A_IB,A_BI; 1208 Mat C_B,CPHI; 1209 IS is_dummy; 1210 Vec mones; 1211 MatType checkmattype=MATSEQAIJ; 1212 PetscReal real_value; 1213 1214 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1215 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1216 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1217 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1218 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1219 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1220 if (unsymmetric_check) { 1221 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1222 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1223 } 1224 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1225 1226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1227 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1229 if (unsymmetric_check) { 1230 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1231 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1232 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1233 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1234 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1235 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1236 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1237 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1238 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1239 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1240 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1241 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1242 } else { 1243 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1244 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1245 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1246 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1247 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1248 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1249 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1250 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1251 } 1252 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1253 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1254 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1255 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1256 if (n_benign) { 1257 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1258 PetscScalar *data,*data2; 1259 1260 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1261 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1262 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1263 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1264 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1265 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1266 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1267 for (i=0;i<pcbddc->local_primal_size;i++) { 1268 data[n_vertices*pcbddc->local_primal_size+i] += data2[i]; 1269 data[i*pcbddc->local_primal_size+n_vertices] += data2[i]; 1270 } 1271 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1272 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1273 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1274 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1275 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1276 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1277 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1278 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1279 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1280 } 1281 #if 0 1282 { 1283 PetscViewer viewer; 1284 char filename[256]; 1285 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1286 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1287 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1288 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1289 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1290 } 1291 #endif 1292 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1293 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1294 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1295 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1296 1297 /* check constraints */ 1298 if (!n_benign) { 1299 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1300 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1301 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1302 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1303 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1304 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1305 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1307 if (unsymmetric_check) { 1308 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1309 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1310 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1311 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1313 } 1314 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1316 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1317 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1318 } 1319 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1320 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1321 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1322 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1323 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1324 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1325 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1326 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1327 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1328 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1329 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1330 if (unsymmetric_check) { 1331 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1332 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1333 } 1334 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1335 } 1336 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1337 /* get back data */ 1338 *coarse_submat_vals_n = coarse_submat_vals; 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1344 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1345 { 1346 Mat *work_mat; 1347 IS isrow_s,iscol_s; 1348 PetscBool rsorted,csorted; 1349 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1350 PetscErrorCode ierr; 1351 1352 PetscFunctionBegin; 1353 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1354 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1355 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1356 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1357 1358 if (!rsorted) { 1359 const PetscInt *idxs; 1360 PetscInt *idxs_sorted,i; 1361 1362 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1363 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1364 for (i=0;i<rsize;i++) { 1365 idxs_perm_r[i] = i; 1366 } 1367 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1368 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1369 for (i=0;i<rsize;i++) { 1370 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1371 } 1372 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1373 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1374 } else { 1375 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1376 isrow_s = isrow; 1377 } 1378 1379 if (!csorted) { 1380 if (isrow == iscol) { 1381 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1382 iscol_s = isrow_s; 1383 } else { 1384 const PetscInt *idxs; 1385 PetscInt *idxs_sorted,i; 1386 1387 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1388 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1389 for (i=0;i<csize;i++) { 1390 idxs_perm_c[i] = i; 1391 } 1392 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1393 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1394 for (i=0;i<csize;i++) { 1395 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1396 } 1397 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1398 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1399 } 1400 } else { 1401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1402 iscol_s = iscol; 1403 } 1404 1405 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1406 1407 if (!rsorted || !csorted) { 1408 Mat new_mat; 1409 IS is_perm_r,is_perm_c; 1410 1411 if (!rsorted) { 1412 PetscInt *idxs_r,i; 1413 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1414 for (i=0;i<rsize;i++) { 1415 idxs_r[idxs_perm_r[i]] = i; 1416 } 1417 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1418 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1419 } else { 1420 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1421 } 1422 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1423 1424 if (!csorted) { 1425 if (isrow_s == iscol_s) { 1426 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1427 is_perm_c = is_perm_r; 1428 } else { 1429 PetscInt *idxs_c,i; 1430 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1431 for (i=0;i<csize;i++) { 1432 idxs_c[idxs_perm_c[i]] = i; 1433 } 1434 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1435 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1436 } 1437 } else { 1438 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1439 } 1440 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1441 1442 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1443 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1444 work_mat[0] = new_mat; 1445 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1447 } 1448 1449 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1450 *B = work_mat[0]; 1451 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1452 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1453 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1459 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1460 { 1461 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1462 PC_IS* pcis = (PC_IS*)pc->data; 1463 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1464 Mat new_mat; 1465 IS is_local,is_global; 1466 PetscInt local_size; 1467 PetscBool isseqaij; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1472 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1474 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1475 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1476 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1477 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1478 1479 /* check */ 1480 if (pcbddc->dbg_flag) { 1481 Vec x,x_change; 1482 PetscReal error; 1483 1484 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1485 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1486 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1487 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1490 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1491 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1492 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1493 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1496 ierr = VecDestroy(&x);CHKERRQ(ierr); 1497 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1498 } 1499 1500 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1501 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1502 if (isseqaij) { 1503 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1504 } else { 1505 Mat work_mat; 1506 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1507 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1508 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1509 } 1510 if (matis->A->symmetric_set) { 1511 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1512 #if !defined(PETSC_USE_COMPLEX) 1513 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1514 #endif 1515 } 1516 /* 1517 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1518 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1519 */ 1520 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1526 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1527 { 1528 PC_IS* pcis = (PC_IS*)(pc->data); 1529 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1530 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1531 PetscInt *idx_R_local=NULL; 1532 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1533 PetscInt vbs,bs; 1534 PetscBT bitmask=NULL; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 /* 1539 No need to setup local scatters if 1540 - primal space is unchanged 1541 AND 1542 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1543 AND 1544 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1545 */ 1546 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1547 PetscFunctionReturn(0); 1548 } 1549 /* destroy old objects */ 1550 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1551 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1552 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1553 /* Set Non-overlapping dimensions */ 1554 n_B = pcis->n_B; 1555 n_D = pcis->n - n_B; 1556 n_vertices = pcbddc->n_vertices; 1557 1558 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1559 1560 /* create auxiliary bitmask and allocate workspace */ 1561 if (!sub_schurs->reuse_mumps) { 1562 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1563 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1564 for (i=0;i<n_vertices;i++) { 1565 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1566 } 1567 1568 for (i=0, n_R=0; i<pcis->n; i++) { 1569 if (!PetscBTLookup(bitmask,i)) { 1570 idx_R_local[n_R++] = i; 1571 } 1572 } 1573 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1574 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1575 1576 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1577 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1578 } 1579 1580 /* Block code */ 1581 vbs = 1; 1582 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1583 if (bs>1 && !(n_vertices%bs)) { 1584 PetscBool is_blocked = PETSC_TRUE; 1585 PetscInt *vary; 1586 if (!sub_schurs->reuse_mumps) { 1587 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1588 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1589 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1590 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1591 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1592 for (i=0; i<pcis->n/bs; i++) { 1593 if (vary[i]!=0 && vary[i]!=bs) { 1594 is_blocked = PETSC_FALSE; 1595 break; 1596 } 1597 } 1598 ierr = PetscFree(vary);CHKERRQ(ierr); 1599 } else { 1600 /* Verify directly the R set */ 1601 for (i=0; i<n_R/bs; i++) { 1602 PetscInt j,node=idx_R_local[bs*i]; 1603 for (j=1; j<bs; j++) { 1604 if (node != idx_R_local[bs*i+j]-j) { 1605 is_blocked = PETSC_FALSE; 1606 break; 1607 } 1608 } 1609 } 1610 } 1611 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1612 vbs = bs; 1613 for (i=0;i<n_R/vbs;i++) { 1614 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1615 } 1616 } 1617 } 1618 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1619 if (sub_schurs->reuse_mumps) { 1620 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1621 1622 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1623 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1624 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1625 reuse_mumps->is_R = pcbddc->is_R_local; 1626 } else { 1627 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1628 } 1629 1630 /* print some info if requested */ 1631 if (pcbddc->dbg_flag) { 1632 PetscInt benign; 1633 1634 benign = (PetscInt)(!!pcbddc->zerodiag); 1635 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1636 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1637 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1639 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1641 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1642 } 1643 1644 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1645 if (!sub_schurs->reuse_mumps) { 1646 IS is_aux1,is_aux2; 1647 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1648 1649 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1651 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1652 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1653 for (i=0; i<n_D; i++) { 1654 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1655 } 1656 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1657 for (i=0, j=0; i<n_R; i++) { 1658 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1659 aux_array1[j++] = i; 1660 } 1661 } 1662 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1663 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1664 for (i=0, j=0; i<n_B; i++) { 1665 if (!PetscBTLookup(bitmask,is_indices[i])) { 1666 aux_array2[j++] = i; 1667 } 1668 } 1669 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1670 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1671 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1672 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1673 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1674 1675 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1676 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1677 for (i=0, j=0; i<n_R; i++) { 1678 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1679 aux_array1[j++] = i; 1680 } 1681 } 1682 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1683 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1684 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1685 } 1686 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1687 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1688 } else { 1689 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1690 IS tis; 1691 PetscInt schur_size; 1692 1693 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1694 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1695 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1696 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1697 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1698 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1699 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1700 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1701 } 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1709 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1710 { 1711 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1712 PC_IS *pcis = (PC_IS*)pc->data; 1713 PC pc_temp; 1714 Mat A_RR; 1715 MatReuse reuse; 1716 PetscScalar m_one = -1.0; 1717 PetscReal value; 1718 PetscInt n_D,n_R; 1719 PetscBool use_exact,use_exact_reduced,issbaij; 1720 PetscErrorCode ierr; 1721 /* prefixes stuff */ 1722 char dir_prefix[256],neu_prefix[256],str_level[16]; 1723 size_t len; 1724 1725 PetscFunctionBegin; 1726 1727 /* compute prefixes */ 1728 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1729 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1730 if (!pcbddc->current_level) { 1731 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1732 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1733 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1734 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1735 } else { 1736 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1737 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1738 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1739 len -= 15; /* remove "pc_bddc_coarse_" */ 1740 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1741 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1742 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1743 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1744 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1745 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1746 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1747 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1748 } 1749 1750 /* DIRICHLET PROBLEM */ 1751 if (dirichlet) { 1752 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1753 if (pcbddc->local_mat->symmetric_set) { 1754 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1755 } 1756 /* Matrix for Dirichlet problem is pcis->A_II */ 1757 n_D = pcis->n - pcis->n_B; 1758 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1759 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1760 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1761 /* default */ 1762 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1763 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1764 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1765 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1766 if (issbaij) { 1767 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1768 } else { 1769 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1770 } 1771 /* Allow user's customization */ 1772 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1773 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1774 } 1775 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1776 if (sub_schurs->reuse_mumps) { 1777 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1778 1779 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1780 } 1781 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1782 if (!n_D) { 1783 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1784 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1785 } 1786 /* Set Up KSP for Dirichlet problem of BDDC */ 1787 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1788 /* set ksp_D into pcis data */ 1789 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1790 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1791 pcis->ksp_D = pcbddc->ksp_D; 1792 } 1793 1794 /* NEUMANN PROBLEM */ 1795 A_RR = 0; 1796 if (neumann) { 1797 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1798 PetscInt ibs,mbs; 1799 PetscBool issbaij; 1800 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1801 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1802 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1803 if (pcbddc->ksp_R) { /* already created ksp */ 1804 PetscInt nn_R; 1805 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1806 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1807 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1808 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1809 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1810 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1811 reuse = MAT_INITIAL_MATRIX; 1812 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1813 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1814 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1815 reuse = MAT_INITIAL_MATRIX; 1816 } else { /* safe to reuse the matrix */ 1817 reuse = MAT_REUSE_MATRIX; 1818 } 1819 } 1820 /* last check */ 1821 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1822 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1823 reuse = MAT_INITIAL_MATRIX; 1824 } 1825 } else { /* first time, so we need to create the matrix */ 1826 reuse = MAT_INITIAL_MATRIX; 1827 } 1828 /* extract A_RR */ 1829 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1830 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1831 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1832 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1833 if (matis->A == pcbddc->local_mat) { 1834 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1835 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1836 } else { 1837 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1838 } 1839 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1840 if (matis->A == pcbddc->local_mat) { 1841 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1842 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1843 } else { 1844 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1845 } 1846 } 1847 if (!sub_schurs->reuse_mumps) { 1848 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1849 if (pcbddc->local_mat->symmetric_set) { 1850 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1851 } 1852 } else { 1853 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1854 1855 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1856 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1857 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1858 } 1859 if (!pcbddc->ksp_R) { /* create object if not present */ 1860 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1861 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1862 /* default */ 1863 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1864 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1865 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1866 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1867 if (issbaij) { 1868 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1869 } else { 1870 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1871 } 1872 /* Allow user's customization */ 1873 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1874 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1875 } 1876 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1877 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1878 if (!n_R) { 1879 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1880 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1881 } 1882 /* Reuse MUMPS solver if it is present */ 1883 if (sub_schurs->reuse_mumps) { 1884 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1885 1886 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1887 } 1888 /* Set Up KSP for Neumann problem of BDDC */ 1889 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1890 } 1891 /* free Neumann problem's matrix */ 1892 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1893 1894 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1895 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1896 if (pcbddc->dbg_flag) { 1897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1898 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1900 } 1901 if (dirichlet) { /* Dirichlet */ 1902 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1903 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1904 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1905 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1906 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1907 /* need to be adapted? */ 1908 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1909 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1910 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1911 /* print info */ 1912 if (pcbddc->dbg_flag) { 1913 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1915 } 1916 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1917 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1918 } 1919 } 1920 if (neumann) { /* Neumann */ 1921 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1922 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1923 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1924 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1925 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1926 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1927 /* need to be adapted? */ 1928 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1929 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1930 /* print info */ 1931 if (pcbddc->dbg_flag) { 1932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1933 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1934 } 1935 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1936 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1937 } 1938 } 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1945 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1946 { 1947 PetscErrorCode ierr; 1948 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1949 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1950 1951 PetscFunctionBegin; 1952 if (!sub_schurs->reuse_mumps) { 1953 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1954 } 1955 if (!pcbddc->switch_static) { 1956 if (applytranspose && pcbddc->local_auxmat1) { 1957 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1958 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1959 } 1960 if (!sub_schurs->reuse_mumps) { 1961 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1962 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1963 } else { 1964 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1965 1966 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1967 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1968 } 1969 } else { 1970 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1971 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1972 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1973 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1974 if (applytranspose && pcbddc->local_auxmat1) { 1975 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1976 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1977 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1978 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1979 } 1980 } 1981 if (!sub_schurs->reuse_mumps) { 1982 if (applytranspose) { 1983 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1984 } else { 1985 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1986 } 1987 #if defined(PETSC_HAVE_MUMPS) 1988 } else { 1989 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1990 1991 if (applytranspose) { 1992 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1993 } else { 1994 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1995 } 1996 #endif 1997 } 1998 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1999 if (!pcbddc->switch_static) { 2000 if (!sub_schurs->reuse_mumps) { 2001 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 } else { 2004 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2005 2006 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2007 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2008 } 2009 if (!applytranspose && pcbddc->local_auxmat1) { 2010 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2011 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2012 } 2013 } else { 2014 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2015 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2016 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 if (!applytranspose && pcbddc->local_auxmat1) { 2019 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2020 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2021 } 2022 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2033 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2034 { 2035 PetscErrorCode ierr; 2036 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2037 PC_IS* pcis = (PC_IS*) (pc->data); 2038 const PetscScalar zero = 0.0; 2039 2040 PetscFunctionBegin; 2041 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2042 if (applytranspose) { 2043 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2044 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2045 } else { 2046 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2047 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2048 } 2049 2050 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2051 if (pcbddc->benign_change) { 2052 PetscScalar *array; 2053 2054 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2055 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2056 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2057 } 2058 2059 /* start communications from local primal nodes to rhs of coarse solver */ 2060 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2061 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 2064 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2065 /* TODO remove null space when doing multilevel */ 2066 if (pcbddc->coarse_ksp) { 2067 Vec rhs,sol; 2068 2069 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2070 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2071 if (applytranspose) { 2072 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2073 } else { 2074 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2075 } 2076 } 2077 2078 /* Local solution on R nodes */ 2079 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2080 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2081 } 2082 2083 /* communications from coarse sol to local primal nodes */ 2084 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2085 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2086 2087 /* Sum contributions from two levels */ 2088 if (applytranspose) { 2089 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2090 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2091 } else { 2092 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2093 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2094 } 2095 /* store p0 */ 2096 if (pcbddc->benign_change) { 2097 PetscScalar *array; 2098 2099 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2100 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2101 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2102 } 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2108 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2109 { 2110 PetscErrorCode ierr; 2111 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2112 PetscScalar *array; 2113 Vec from,to; 2114 2115 PetscFunctionBegin; 2116 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2117 from = pcbddc->coarse_vec; 2118 to = pcbddc->vec1_P; 2119 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2120 Vec tvec; 2121 2122 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2123 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2124 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2125 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2126 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2128 } 2129 } else { /* from local to global -> put data in coarse right hand side */ 2130 from = pcbddc->vec1_P; 2131 to = pcbddc->coarse_vec; 2132 } 2133 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2139 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2140 { 2141 PetscErrorCode ierr; 2142 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2143 PetscScalar *array; 2144 Vec from,to; 2145 2146 PetscFunctionBegin; 2147 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2148 from = pcbddc->coarse_vec; 2149 to = pcbddc->vec1_P; 2150 } else { /* from local to global -> put data in coarse right hand side */ 2151 from = pcbddc->vec1_P; 2152 to = pcbddc->coarse_vec; 2153 } 2154 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2155 if (smode == SCATTER_FORWARD) { 2156 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2157 Vec tvec; 2158 2159 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2160 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2161 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2162 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2163 } 2164 } else { 2165 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2166 ierr = VecResetArray(from);CHKERRQ(ierr); 2167 } 2168 } 2169 PetscFunctionReturn(0); 2170 } 2171 2172 /* uncomment for testing purposes */ 2173 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2176 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2177 { 2178 PetscErrorCode ierr; 2179 PC_IS* pcis = (PC_IS*)(pc->data); 2180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2181 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2182 /* one and zero */ 2183 PetscScalar one=1.0,zero=0.0; 2184 /* space to store constraints and their local indices */ 2185 PetscScalar *constraints_data; 2186 PetscInt *constraints_idxs,*constraints_idxs_B; 2187 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2188 PetscInt *constraints_n; 2189 /* iterators */ 2190 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2191 /* BLAS integers */ 2192 PetscBLASInt lwork,lierr; 2193 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2194 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2195 /* reuse */ 2196 PetscInt olocal_primal_size,olocal_primal_size_cc; 2197 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2198 /* change of basis */ 2199 PetscBool qr_needed; 2200 PetscBT change_basis,qr_needed_idx; 2201 /* auxiliary stuff */ 2202 PetscInt *nnz,*is_indices; 2203 PetscInt ncc; 2204 /* some quantities */ 2205 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2206 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2207 2208 PetscFunctionBegin; 2209 /* Destroy Mat objects computed previously */ 2210 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2211 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2212 /* save info on constraints from previous setup (if any) */ 2213 olocal_primal_size = pcbddc->local_primal_size; 2214 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2215 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2216 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2217 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2218 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2219 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2220 2221 /* print some info */ 2222 if (pcbddc->dbg_flag) { 2223 IS vertices; 2224 PetscInt nv,nedges,nfaces; 2225 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2226 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2227 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2228 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2229 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2234 } 2235 2236 if (!pcbddc->adaptive_selection) { 2237 IS ISForVertices,*ISForFaces,*ISForEdges; 2238 MatNullSpace nearnullsp; 2239 const Vec *nearnullvecs; 2240 Vec *localnearnullsp; 2241 PetscScalar *array; 2242 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2243 PetscBool nnsp_has_cnst; 2244 /* LAPACK working arrays for SVD or POD */ 2245 PetscBool skip_lapack,boolforchange; 2246 PetscScalar *work; 2247 PetscReal *singular_vals; 2248 #if defined(PETSC_USE_COMPLEX) 2249 PetscReal *rwork; 2250 #endif 2251 #if defined(PETSC_MISSING_LAPACK_GESVD) 2252 PetscScalar *temp_basis,*correlation_mat; 2253 #else 2254 PetscBLASInt dummy_int=1; 2255 PetscScalar dummy_scalar=1.; 2256 #endif 2257 2258 /* Get index sets for faces, edges and vertices from graph */ 2259 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2260 /* free unneeded index sets */ 2261 if (!pcbddc->use_vertices) { 2262 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2263 } 2264 if (!pcbddc->use_edges) { 2265 for (i=0;i<n_ISForEdges;i++) { 2266 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2267 } 2268 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2269 n_ISForEdges = 0; 2270 } 2271 if (!pcbddc->use_faces) { 2272 for (i=0;i<n_ISForFaces;i++) { 2273 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2274 } 2275 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2276 n_ISForFaces = 0; 2277 } 2278 2279 #if defined(PETSC_USE_DEBUG) 2280 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2281 Also use_change_of_basis should be consistent among processors */ 2282 if (pcbddc->NullSpace) { 2283 PetscBool tbool[2],gbool[2]; 2284 2285 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2286 pcbddc->use_change_of_basis = PETSC_TRUE; 2287 if (!ISForEdges) { 2288 pcbddc->use_change_on_faces = PETSC_TRUE; 2289 } 2290 } 2291 tbool[0] = pcbddc->use_change_of_basis; 2292 tbool[1] = pcbddc->use_change_on_faces; 2293 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2294 pcbddc->use_change_of_basis = gbool[0]; 2295 pcbddc->use_change_on_faces = gbool[1]; 2296 } 2297 #endif 2298 2299 /* check if near null space is attached to global mat */ 2300 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2301 if (nearnullsp) { 2302 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2303 /* remove any stored info */ 2304 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2305 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2306 /* store information for BDDC solver reuse */ 2307 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2308 pcbddc->onearnullspace = nearnullsp; 2309 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2310 for (i=0;i<nnsp_size;i++) { 2311 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2312 } 2313 } else { /* if near null space is not provided BDDC uses constants by default */ 2314 nnsp_size = 0; 2315 nnsp_has_cnst = PETSC_TRUE; 2316 } 2317 /* get max number of constraints on a single cc */ 2318 max_constraints = nnsp_size; 2319 if (nnsp_has_cnst) max_constraints++; 2320 2321 /* 2322 Evaluate maximum storage size needed by the procedure 2323 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2324 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2325 There can be multiple constraints per connected component 2326 */ 2327 n_vertices = 0; 2328 if (ISForVertices) { 2329 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2330 } 2331 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2332 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2333 2334 total_counts = n_ISForFaces+n_ISForEdges; 2335 total_counts *= max_constraints; 2336 total_counts += n_vertices; 2337 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2338 2339 total_counts = 0; 2340 max_size_of_constraint = 0; 2341 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2342 IS used_is; 2343 if (i<n_ISForEdges) { 2344 used_is = ISForEdges[i]; 2345 } else { 2346 used_is = ISForFaces[i-n_ISForEdges]; 2347 } 2348 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2349 total_counts += j; 2350 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2351 } 2352 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2353 2354 /* get local part of global near null space vectors */ 2355 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2356 for (k=0;k<nnsp_size;k++) { 2357 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2358 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2359 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2360 } 2361 2362 /* whether or not to skip lapack calls */ 2363 skip_lapack = PETSC_TRUE; 2364 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2365 2366 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2367 if (!skip_lapack) { 2368 PetscScalar temp_work; 2369 2370 #if defined(PETSC_MISSING_LAPACK_GESVD) 2371 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2372 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2373 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2374 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2375 #if defined(PETSC_USE_COMPLEX) 2376 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2377 #endif 2378 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2379 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2380 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2381 lwork = -1; 2382 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2383 #if !defined(PETSC_USE_COMPLEX) 2384 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2385 #else 2386 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2387 #endif 2388 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2389 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2390 #else /* on missing GESVD */ 2391 /* SVD */ 2392 PetscInt max_n,min_n; 2393 max_n = max_size_of_constraint; 2394 min_n = max_constraints; 2395 if (max_size_of_constraint < max_constraints) { 2396 min_n = max_size_of_constraint; 2397 max_n = max_constraints; 2398 } 2399 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2400 #if defined(PETSC_USE_COMPLEX) 2401 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2402 #endif 2403 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2404 lwork = -1; 2405 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2406 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2407 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2408 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2409 #if !defined(PETSC_USE_COMPLEX) 2410 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2411 #else 2412 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2413 #endif 2414 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2415 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2416 #endif /* on missing GESVD */ 2417 /* Allocate optimal workspace */ 2418 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2419 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2420 } 2421 /* Now we can loop on constraining sets */ 2422 total_counts = 0; 2423 constraints_idxs_ptr[0] = 0; 2424 constraints_data_ptr[0] = 0; 2425 /* vertices */ 2426 if (n_vertices) { 2427 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2428 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2429 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2430 for (i=0;i<n_vertices;i++) { 2431 constraints_n[total_counts] = 1; 2432 constraints_data[total_counts] = 1.0; 2433 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2434 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2435 total_counts++; 2436 } 2437 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2438 PetscBool used_vertex; 2439 for (i=0;i<n_vertices;i++) { 2440 used_vertex = PETSC_FALSE; 2441 k = 0; 2442 while (!used_vertex && k<nnsp_size) { 2443 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2444 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2445 constraints_n[total_counts] = 1; 2446 constraints_idxs[total_counts] = is_indices[i]; 2447 constraints_data[total_counts] = 1.0; 2448 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2449 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2450 total_counts++; 2451 used_vertex = PETSC_TRUE; 2452 } 2453 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2454 k++; 2455 } 2456 } 2457 } 2458 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2459 n_vertices = total_counts; 2460 } 2461 2462 /* edges and faces */ 2463 total_counts_cc = total_counts; 2464 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2465 IS used_is; 2466 PetscBool idxs_copied = PETSC_FALSE; 2467 2468 if (ncc<n_ISForEdges) { 2469 used_is = ISForEdges[ncc]; 2470 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2471 } else { 2472 used_is = ISForFaces[ncc-n_ISForEdges]; 2473 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2474 } 2475 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2476 2477 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2478 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2479 /* change of basis should not be performed on local periodic nodes */ 2480 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2481 if (nnsp_has_cnst) { 2482 PetscScalar quad_value; 2483 2484 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2485 idxs_copied = PETSC_TRUE; 2486 2487 if (!pcbddc->use_nnsp_true) { 2488 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2489 } else { 2490 quad_value = 1.0; 2491 } 2492 for (j=0;j<size_of_constraint;j++) { 2493 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2494 } 2495 temp_constraints++; 2496 total_counts++; 2497 } 2498 for (k=0;k<nnsp_size;k++) { 2499 PetscReal real_value; 2500 PetscScalar *ptr_to_data; 2501 2502 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2503 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2504 for (j=0;j<size_of_constraint;j++) { 2505 ptr_to_data[j] = array[is_indices[j]]; 2506 } 2507 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2508 /* check if array is null on the connected component */ 2509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2510 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2511 if (real_value > 0.0) { /* keep indices and values */ 2512 temp_constraints++; 2513 total_counts++; 2514 if (!idxs_copied) { 2515 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2516 idxs_copied = PETSC_TRUE; 2517 } 2518 } 2519 } 2520 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2521 valid_constraints = temp_constraints; 2522 if (!pcbddc->use_nnsp_true && temp_constraints) { 2523 if (temp_constraints == 1) { /* just normalize the constraint */ 2524 PetscScalar norm,*ptr_to_data; 2525 2526 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2528 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2529 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2530 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2531 } else { /* perform SVD */ 2532 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2533 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2534 2535 #if defined(PETSC_MISSING_LAPACK_GESVD) 2536 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2537 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2538 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2539 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2540 from that computed using LAPACKgesvd 2541 -> This is due to a different computation of eigenvectors in LAPACKheev 2542 -> The quality of the POD-computed basis will be the same */ 2543 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2544 /* Store upper triangular part of correlation matrix */ 2545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2546 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2547 for (j=0;j<temp_constraints;j++) { 2548 for (k=0;k<j+1;k++) { 2549 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2550 } 2551 } 2552 /* compute eigenvalues and eigenvectors of correlation matrix */ 2553 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2555 #if !defined(PETSC_USE_COMPLEX) 2556 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2557 #else 2558 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2559 #endif 2560 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2561 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2562 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2563 j = 0; 2564 while (j < temp_constraints && singular_vals[j] < tol) j++; 2565 total_counts = total_counts-j; 2566 valid_constraints = temp_constraints-j; 2567 /* scale and copy POD basis into used quadrature memory */ 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2572 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2574 if (j<temp_constraints) { 2575 PetscInt ii; 2576 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2577 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2578 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2579 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2580 for (k=0;k<temp_constraints-j;k++) { 2581 for (ii=0;ii<size_of_constraint;ii++) { 2582 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2583 } 2584 } 2585 } 2586 #else /* on missing GESVD */ 2587 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2588 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2589 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2590 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2591 #if !defined(PETSC_USE_COMPLEX) 2592 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2593 #else 2594 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2595 #endif 2596 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2597 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2598 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2599 k = temp_constraints; 2600 if (k > size_of_constraint) k = size_of_constraint; 2601 j = 0; 2602 while (j < k && singular_vals[k-j-1] < tol) j++; 2603 valid_constraints = k-j; 2604 total_counts = total_counts-temp_constraints+valid_constraints; 2605 #endif /* on missing GESVD */ 2606 } 2607 } 2608 /* update pointers information */ 2609 if (valid_constraints) { 2610 constraints_n[total_counts_cc] = valid_constraints; 2611 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2612 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2613 /* set change_of_basis flag */ 2614 if (boolforchange) { 2615 PetscBTSet(change_basis,total_counts_cc); 2616 } 2617 total_counts_cc++; 2618 } 2619 } 2620 /* free workspace */ 2621 if (!skip_lapack) { 2622 ierr = PetscFree(work);CHKERRQ(ierr); 2623 #if defined(PETSC_USE_COMPLEX) 2624 ierr = PetscFree(rwork);CHKERRQ(ierr); 2625 #endif 2626 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2627 #if defined(PETSC_MISSING_LAPACK_GESVD) 2628 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2629 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2630 #endif 2631 } 2632 for (k=0;k<nnsp_size;k++) { 2633 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2634 } 2635 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2636 /* free index sets of faces, edges and vertices */ 2637 for (i=0;i<n_ISForFaces;i++) { 2638 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2639 } 2640 if (n_ISForFaces) { 2641 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2642 } 2643 for (i=0;i<n_ISForEdges;i++) { 2644 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2645 } 2646 if (n_ISForEdges) { 2647 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2648 } 2649 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2650 } else { 2651 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2652 2653 total_counts = 0; 2654 n_vertices = 0; 2655 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2656 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2657 } 2658 max_constraints = 0; 2659 total_counts_cc = 0; 2660 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2661 total_counts += pcbddc->adaptive_constraints_n[i]; 2662 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2663 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2664 } 2665 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2666 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2667 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2668 constraints_data = pcbddc->adaptive_constraints_data; 2669 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2670 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2671 total_counts_cc = 0; 2672 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2673 if (pcbddc->adaptive_constraints_n[i]) { 2674 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2675 } 2676 } 2677 #if 0 2678 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2679 for (i=0;i<total_counts_cc;i++) { 2680 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2681 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2682 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2683 printf(" %d",constraints_idxs[j]); 2684 } 2685 printf("\n"); 2686 printf("number of cc: %d\n",constraints_n[i]); 2687 } 2688 for (i=0;i<n_vertices;i++) { 2689 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2690 } 2691 for (i=0;i<sub_schurs->n_subs;i++) { 2692 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2693 } 2694 #endif 2695 2696 max_size_of_constraint = 0; 2697 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2698 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2699 /* Change of basis */ 2700 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2701 if (pcbddc->use_change_of_basis) { 2702 for (i=0;i<sub_schurs->n_subs;i++) { 2703 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2704 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2705 } 2706 } 2707 } 2708 } 2709 pcbddc->local_primal_size = total_counts; 2710 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2711 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2712 2713 /* map constraints_idxs in boundary numbering */ 2714 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2715 if (i != constraints_idxs_ptr[total_counts_cc]) { 2716 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2717 } 2718 2719 /* Create constraint matrix */ 2720 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2721 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2722 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2723 2724 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2725 /* determine if a QR strategy is needed for change of basis */ 2726 qr_needed = PETSC_FALSE; 2727 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2728 total_primal_vertices=0; 2729 pcbddc->local_primal_size_cc = 0; 2730 for (i=0;i<total_counts_cc;i++) { 2731 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2732 if (size_of_constraint == 1) { 2733 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2734 pcbddc->local_primal_size_cc += 1; 2735 } else if (PetscBTLookup(change_basis,i)) { 2736 for (k=0;k<constraints_n[i];k++) { 2737 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2738 } 2739 pcbddc->local_primal_size_cc += constraints_n[i]; 2740 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2741 PetscBTSet(qr_needed_idx,i); 2742 qr_needed = PETSC_TRUE; 2743 } 2744 } else { 2745 pcbddc->local_primal_size_cc += 1; 2746 } 2747 } 2748 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2749 pcbddc->n_vertices = total_primal_vertices; 2750 /* permute indices in order to have a sorted set of vertices */ 2751 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2752 2753 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2754 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2755 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2756 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2757 2758 /* nonzero structure of constraint matrix */ 2759 /* and get reference dof for local constraints */ 2760 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2761 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2762 2763 j = total_primal_vertices; 2764 total_counts = total_primal_vertices; 2765 cum = total_primal_vertices; 2766 for (i=n_vertices;i<total_counts_cc;i++) { 2767 if (!PetscBTLookup(change_basis,i)) { 2768 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2769 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2770 cum++; 2771 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2772 for (k=0;k<constraints_n[i];k++) { 2773 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2774 nnz[j+k] = size_of_constraint; 2775 } 2776 j += constraints_n[i]; 2777 } 2778 } 2779 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2780 ierr = PetscFree(nnz);CHKERRQ(ierr); 2781 2782 /* set values in constraint matrix */ 2783 for (i=0;i<total_primal_vertices;i++) { 2784 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2785 } 2786 total_counts = total_primal_vertices; 2787 for (i=n_vertices;i<total_counts_cc;i++) { 2788 if (!PetscBTLookup(change_basis,i)) { 2789 PetscInt *cols; 2790 2791 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2792 cols = constraints_idxs+constraints_idxs_ptr[i]; 2793 for (k=0;k<constraints_n[i];k++) { 2794 PetscInt row = total_counts+k; 2795 PetscScalar *vals; 2796 2797 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2798 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2799 } 2800 total_counts += constraints_n[i]; 2801 } 2802 } 2803 /* assembling */ 2804 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2805 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2806 2807 /* 2808 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2809 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2810 */ 2811 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2812 if (pcbddc->use_change_of_basis) { 2813 /* dual and primal dofs on a single cc */ 2814 PetscInt dual_dofs,primal_dofs; 2815 /* working stuff for GEQRF */ 2816 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2817 PetscBLASInt lqr_work; 2818 /* working stuff for UNGQR */ 2819 PetscScalar *gqr_work,lgqr_work_t; 2820 PetscBLASInt lgqr_work; 2821 /* working stuff for TRTRS */ 2822 PetscScalar *trs_rhs; 2823 PetscBLASInt Blas_NRHS; 2824 /* pointers for values insertion into change of basis matrix */ 2825 PetscInt *start_rows,*start_cols; 2826 PetscScalar *start_vals; 2827 /* working stuff for values insertion */ 2828 PetscBT is_primal; 2829 PetscInt *aux_primal_numbering_B; 2830 /* matrix sizes */ 2831 PetscInt global_size,local_size; 2832 /* temporary change of basis */ 2833 Mat localChangeOfBasisMatrix; 2834 /* extra space for debugging */ 2835 PetscScalar *dbg_work; 2836 2837 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2838 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2839 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2840 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2841 /* nonzeros for local mat */ 2842 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2843 for (i=0;i<pcis->n;i++) nnz[i]=1; 2844 for (i=n_vertices;i<total_counts_cc;i++) { 2845 if (PetscBTLookup(change_basis,i)) { 2846 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2847 if (PetscBTLookup(qr_needed_idx,i)) { 2848 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2849 } else { 2850 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2851 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2852 } 2853 } 2854 } 2855 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2856 ierr = PetscFree(nnz);CHKERRQ(ierr); 2857 /* Set initial identity in the matrix */ 2858 for (i=0;i<pcis->n;i++) { 2859 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2860 } 2861 2862 if (pcbddc->dbg_flag) { 2863 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2865 } 2866 2867 2868 /* Now we loop on the constraints which need a change of basis */ 2869 /* 2870 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2871 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2872 2873 Basic blocks of change of basis matrix T computed by 2874 2875 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2876 2877 | 1 0 ... 0 s_1/S | 2878 | 0 1 ... 0 s_2/S | 2879 | ... | 2880 | 0 ... 1 s_{n-1}/S | 2881 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2882 2883 with S = \sum_{i=1}^n s_i^2 2884 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2885 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2886 2887 - QR decomposition of constraints otherwise 2888 */ 2889 if (qr_needed) { 2890 /* space to store Q */ 2891 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2892 /* first we issue queries for optimal work */ 2893 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2894 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2895 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2896 lqr_work = -1; 2897 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2898 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2899 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2900 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2901 lgqr_work = -1; 2902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2904 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2905 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2906 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2907 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2908 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2909 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2910 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2911 /* array to store scaling factors for reflectors */ 2912 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2913 /* array to store rhs and solution of triangular solver */ 2914 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2915 /* allocating workspace for check */ 2916 if (pcbddc->dbg_flag) { 2917 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2918 } 2919 } 2920 /* array to store whether a node is primal or not */ 2921 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2922 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2923 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2924 if (i != total_primal_vertices) { 2925 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2926 } 2927 for (i=0;i<total_primal_vertices;i++) { 2928 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2929 } 2930 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2931 2932 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2933 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2934 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2935 if (PetscBTLookup(change_basis,total_counts)) { 2936 /* get constraint info */ 2937 primal_dofs = constraints_n[total_counts]; 2938 dual_dofs = size_of_constraint-primal_dofs; 2939 2940 if (pcbddc->dbg_flag) { 2941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2942 } 2943 2944 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2945 2946 /* copy quadrature constraints for change of basis check */ 2947 if (pcbddc->dbg_flag) { 2948 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2949 } 2950 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2951 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2952 2953 /* compute QR decomposition of constraints */ 2954 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2955 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2957 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2958 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2959 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2960 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2961 2962 /* explictly compute R^-T */ 2963 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2964 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2965 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2966 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2968 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2969 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2970 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2971 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2972 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2973 2974 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2977 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2980 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2981 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2982 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2983 2984 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2985 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2986 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2988 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2989 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2991 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2993 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2994 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2995 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2996 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2997 2998 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2999 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3000 /* insert cols for primal dofs */ 3001 for (j=0;j<primal_dofs;j++) { 3002 start_vals = &qr_basis[j*size_of_constraint]; 3003 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3004 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3005 } 3006 /* insert cols for dual dofs */ 3007 for (j=0,k=0;j<dual_dofs;k++) { 3008 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3009 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3010 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3011 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3012 j++; 3013 } 3014 } 3015 3016 /* check change of basis */ 3017 if (pcbddc->dbg_flag) { 3018 PetscInt ii,jj; 3019 PetscBool valid_qr=PETSC_TRUE; 3020 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3021 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3022 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3023 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3025 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3026 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3027 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3028 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3029 for (jj=0;jj<size_of_constraint;jj++) { 3030 for (ii=0;ii<primal_dofs;ii++) { 3031 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3032 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3033 } 3034 } 3035 if (!valid_qr) { 3036 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3037 for (jj=0;jj<size_of_constraint;jj++) { 3038 for (ii=0;ii<primal_dofs;ii++) { 3039 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3040 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3041 } 3042 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3043 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3044 } 3045 } 3046 } 3047 } else { 3048 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3049 } 3050 } 3051 } else { /* simple transformation block */ 3052 PetscInt row,col; 3053 PetscScalar val,norm; 3054 3055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3056 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3057 for (j=0;j<size_of_constraint;j++) { 3058 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3059 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3060 if (!PetscBTLookup(is_primal,row_B)) { 3061 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3062 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3063 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3064 } else { 3065 for (k=0;k<size_of_constraint;k++) { 3066 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3067 if (row != col) { 3068 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3069 } else { 3070 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3071 } 3072 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3073 } 3074 } 3075 } 3076 if (pcbddc->dbg_flag) { 3077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3078 } 3079 } 3080 } else { 3081 if (pcbddc->dbg_flag) { 3082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3083 } 3084 } 3085 } 3086 3087 /* free workspace */ 3088 if (qr_needed) { 3089 if (pcbddc->dbg_flag) { 3090 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3091 } 3092 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3093 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3094 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3095 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3096 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3097 } 3098 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3099 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3100 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3101 3102 /* assembling of global change of variable */ 3103 { 3104 Mat tmat; 3105 PetscInt bs; 3106 3107 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3108 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3109 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3110 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3111 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3112 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3113 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3114 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3115 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3116 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3117 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3118 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3119 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3120 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3121 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3122 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3123 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3124 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3125 } 3126 /* check */ 3127 if (pcbddc->dbg_flag) { 3128 PetscReal error; 3129 Vec x,x_change; 3130 3131 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3132 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3133 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3134 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3135 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3136 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3137 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3138 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3139 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3140 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3141 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3142 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3144 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3145 ierr = VecDestroy(&x);CHKERRQ(ierr); 3146 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3147 } 3148 3149 /* adapt sub_schurs computed (if any) */ 3150 if (pcbddc->use_deluxe_scaling) { 3151 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3152 if (sub_schurs->S_Ej_all) { 3153 Mat S_new,tmat; 3154 IS is_all_N; 3155 3156 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3157 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3158 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3159 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3160 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3161 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3162 sub_schurs->S_Ej_all = S_new; 3163 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3164 if (sub_schurs->sum_S_Ej_all) { 3165 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3166 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3167 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3168 sub_schurs->sum_S_Ej_all = S_new; 3169 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3170 } 3171 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3172 } 3173 } 3174 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3175 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3176 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3177 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3178 } 3179 3180 /* set up change of basis context */ 3181 if (pcbddc->ChangeOfBasisMatrix) { 3182 PCBDDCChange_ctx change_ctx; 3183 3184 if (!pcbddc->new_global_mat) { 3185 PetscInt global_size,local_size; 3186 3187 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3188 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3189 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3190 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3191 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3192 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3193 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3194 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3195 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3196 } else { 3197 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3198 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3199 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3200 } 3201 if (!pcbddc->user_ChangeOfBasisMatrix) { 3202 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3203 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3204 } else { 3205 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3206 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3207 } 3208 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3209 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3210 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3211 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 } 3213 3214 /* add pressure dof to set of primal nodes for numbering purposes */ 3215 if (pcbddc->zerodiag) { 3216 const PetscInt *idxs; 3217 PetscInt nz; 3218 3219 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 3220 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3221 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3222 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3223 ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3224 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3225 pcbddc->local_primal_size_cc++; 3226 pcbddc->local_primal_size++; 3227 } 3228 3229 /* check if a new primal space has been introduced (also take into account benign trick) */ 3230 pcbddc->new_primal_space_local = PETSC_TRUE; 3231 if (olocal_primal_size == pcbddc->local_primal_size) { 3232 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3233 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3234 if (!pcbddc->new_primal_space_local) { 3235 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3236 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3237 } 3238 } 3239 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3240 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3241 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3242 3243 /* flush dbg viewer */ 3244 if (pcbddc->dbg_flag) { 3245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3246 } 3247 3248 /* free workspace */ 3249 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3250 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3251 if (!pcbddc->adaptive_selection) { 3252 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3253 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3254 } else { 3255 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3256 pcbddc->adaptive_constraints_idxs_ptr, 3257 pcbddc->adaptive_constraints_data_ptr, 3258 pcbddc->adaptive_constraints_idxs, 3259 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3260 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3261 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3262 } 3263 PetscFunctionReturn(0); 3264 } 3265 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3268 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3269 { 3270 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3271 PC_IS *pcis = (PC_IS*)pc->data; 3272 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3273 PetscInt ierr,i,vertex_size,N; 3274 PetscViewer viewer=pcbddc->dbg_viewer; 3275 3276 PetscFunctionBegin; 3277 /* Reset previously computed graph */ 3278 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3279 /* Init local Graph struct */ 3280 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3281 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3282 3283 /* Check validity of the csr graph passed in by the user */ 3284 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3285 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3286 } 3287 3288 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3289 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3290 PetscInt *xadj,*adjncy; 3291 PetscInt nvtxs; 3292 PetscBool flg_row=PETSC_FALSE; 3293 3294 if (pcbddc->use_local_adj) { 3295 3296 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3297 if (flg_row) { 3298 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3299 pcbddc->computed_rowadj = PETSC_TRUE; 3300 } 3301 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3302 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3303 IS is_dummy; 3304 ISLocalToGlobalMapping l2gmap_dummy; 3305 PetscInt j,sum; 3306 PetscInt *cxadj,*cadjncy; 3307 const PetscInt *idxs; 3308 PCBDDCGraph graph; 3309 PetscBT is_on_boundary; 3310 3311 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3312 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3313 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3314 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3315 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3316 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3317 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3318 if (flg_row) { 3319 graph->xadj = xadj; 3320 graph->adjncy = adjncy; 3321 } 3322 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3323 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3324 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3325 3326 if (pcbddc->dbg_flag) { 3327 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3328 for (i=0;i<graph->ncc;i++) { 3329 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3330 } 3331 } 3332 3333 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3334 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3335 for (i=0;i<pcis->n_B;i++) { 3336 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3337 } 3338 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3339 3340 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3341 sum = 0; 3342 for (i=0;i<graph->ncc;i++) { 3343 PetscInt sizecc = 0; 3344 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3345 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3346 sizecc++; 3347 } 3348 } 3349 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3350 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3351 cxadj[graph->queue[j]] = sizecc; 3352 } 3353 } 3354 sum += sizecc*sizecc; 3355 } 3356 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3357 sum = 0; 3358 for (i=0;i<pcis->n;i++) { 3359 PetscInt temp = cxadj[i]; 3360 cxadj[i] = sum; 3361 sum += temp; 3362 } 3363 cxadj[pcis->n] = sum; 3364 for (i=0;i<graph->ncc;i++) { 3365 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3366 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3367 PetscInt k,sizecc = 0; 3368 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3369 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3370 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3371 sizecc++; 3372 } 3373 } 3374 } 3375 } 3376 } 3377 if (sum) { 3378 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3379 } else { 3380 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3381 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3382 } 3383 graph->xadj = 0; 3384 graph->adjncy = 0; 3385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3386 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3387 } 3388 } 3389 if (pcbddc->dbg_flag) { 3390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3391 } 3392 3393 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3394 vertex_size = 1; 3395 if (pcbddc->user_provided_isfordofs) { 3396 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3397 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3398 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3399 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3400 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3401 } 3402 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3403 pcbddc->n_ISForDofs = 0; 3404 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3405 } 3406 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3407 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3408 } else { 3409 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3410 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3411 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3412 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3413 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3414 } 3415 } 3416 } 3417 3418 /* Setup of Graph */ 3419 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3420 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3421 } 3422 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3423 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3424 } 3425 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3426 3427 /* Graph's connected components analysis */ 3428 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3429 3430 /* print some info to stdout */ 3431 if (pcbddc->dbg_flag) { 3432 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3433 } 3434 3435 /* mark topography has done */ 3436 pcbddc->recompute_topography = PETSC_FALSE; 3437 PetscFunctionReturn(0); 3438 } 3439 3440 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3441 #undef __FUNCT__ 3442 #define __FUNCT__ "PCBDDCSubsetNumbering" 3443 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3444 { 3445 PetscSF sf; 3446 PetscLayout map; 3447 const PetscInt *idxs; 3448 PetscInt *leaf_data,*root_data,*gidxs; 3449 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3450 PetscInt n_n,nlocals,start,first_index; 3451 PetscMPIInt commsize; 3452 PetscBool first_found; 3453 PetscErrorCode ierr; 3454 3455 PetscFunctionBegin; 3456 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3457 if (subset_mult) { 3458 PetscCheckSameComm(subset,1,subset_mult,2); 3459 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3460 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3461 } 3462 /* create workspace layout for computing global indices of subset */ 3463 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3464 lbounds[0] = lbounds[1] = 0; 3465 for (i=0;i<n;i++) { 3466 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3467 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3468 } 3469 lbounds[0] = -lbounds[0]; 3470 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3471 gbounds[0] = -gbounds[0]; 3472 N = gbounds[1] - gbounds[0] + 1; 3473 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3474 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3475 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3476 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3477 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3478 3479 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3480 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3481 if (subset_mult) { 3482 const PetscInt* idxs_mult; 3483 3484 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3485 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3486 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3487 } else { 3488 for (i=0;i<n;i++) leaf_data[i] = 1; 3489 } 3490 /* local size of new subset */ 3491 n_n = 0; 3492 for (i=0;i<n;i++) n_n += leaf_data[i]; 3493 3494 /* global indexes in layout */ 3495 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3496 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3497 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3498 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3499 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3500 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3501 3502 /* reduce from leaves to roots */ 3503 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3504 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3505 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3506 3507 /* count indexes in local part of layout */ 3508 nlocals = 0; 3509 first_index = -1; 3510 first_found = PETSC_FALSE; 3511 for (i=0;i<Nl;i++) { 3512 if (!first_found && root_data[i]) { 3513 first_found = PETSC_TRUE; 3514 first_index = i; 3515 } 3516 nlocals += root_data[i]; 3517 } 3518 3519 /* cumulative of number of indexes and size of subset without holes */ 3520 #if defined(PETSC_HAVE_MPI_EXSCAN) 3521 start = 0; 3522 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3523 #else 3524 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3525 start = start-nlocals; 3526 #endif 3527 3528 if (N_n) { /* compute total size of new subset if requested */ 3529 *N_n = start + nlocals; 3530 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3531 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3532 } 3533 3534 /* adapt root data with cumulative */ 3535 if (first_found) { 3536 PetscInt old_index; 3537 3538 root_data[first_index] += start; 3539 old_index = first_index; 3540 for (i=first_index+1;i<Nl;i++) { 3541 if (root_data[i]) { 3542 root_data[i] += root_data[old_index]; 3543 old_index = i; 3544 } 3545 } 3546 } 3547 3548 /* from roots to leaves */ 3549 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3550 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3551 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3552 3553 /* create new IS with global indexes without holes */ 3554 if (subset_mult) { 3555 const PetscInt* idxs_mult; 3556 PetscInt cum; 3557 3558 cum = 0; 3559 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3560 for (i=0;i<n;i++) { 3561 PetscInt j; 3562 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3563 } 3564 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3565 } else { 3566 for (i=0;i<n;i++) { 3567 gidxs[i] = leaf_data[i]-1; 3568 } 3569 } 3570 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3571 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3572 PetscFunctionReturn(0); 3573 } 3574 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3577 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3578 { 3579 PetscInt i,j; 3580 PetscScalar *alphas; 3581 PetscErrorCode ierr; 3582 3583 PetscFunctionBegin; 3584 /* this implements stabilized Gram-Schmidt */ 3585 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3586 for (i=0;i<n;i++) { 3587 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3588 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3589 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3590 } 3591 ierr = PetscFree(alphas);CHKERRQ(ierr); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 #undef __FUNCT__ 3596 #define __FUNCT__ "MatISGetSubassemblingPattern" 3597 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3598 { 3599 IS ranks_send_to; 3600 PetscInt n_neighs,*neighs,*n_shared,**shared; 3601 PetscMPIInt size,rank,color; 3602 PetscInt *xadj,*adjncy; 3603 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3604 PetscInt i,local_size,threshold=0; 3605 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3606 PetscSubcomm subcomm; 3607 PetscErrorCode ierr; 3608 3609 PetscFunctionBegin; 3610 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3611 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3612 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3613 3614 /* Get info on mapping */ 3615 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3616 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3617 3618 /* build local CSR graph of subdomains' connectivity */ 3619 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3620 xadj[0] = 0; 3621 xadj[1] = PetscMax(n_neighs-1,0); 3622 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3623 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3624 3625 if (threshold) { 3626 PetscInt xadj_count = 0; 3627 for (i=1;i<n_neighs;i++) { 3628 if (n_shared[i] > threshold) { 3629 adjncy[xadj_count] = neighs[i]; 3630 adjncy_wgt[xadj_count] = n_shared[i]; 3631 xadj_count++; 3632 } 3633 } 3634 xadj[1] = xadj_count; 3635 } else { 3636 if (xadj[1]) { 3637 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3638 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3639 } 3640 } 3641 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3642 if (use_square) { 3643 for (i=0;i<xadj[1];i++) { 3644 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3645 } 3646 } 3647 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3648 3649 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3650 3651 /* 3652 Restrict work on active processes only. 3653 */ 3654 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3655 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3656 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3657 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3658 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3659 if (color) { 3660 ierr = PetscFree(xadj);CHKERRQ(ierr); 3661 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3662 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3663 } else { 3664 Mat subdomain_adj; 3665 IS new_ranks,new_ranks_contig; 3666 MatPartitioning partitioner; 3667 PetscInt prank,rstart=0,rend=0; 3668 PetscInt *is_indices,*oldranks; 3669 PetscBool aggregate; 3670 3671 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3672 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3673 prank = rank; 3674 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3675 /* 3676 for (i=0;i<size;i++) { 3677 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3678 } 3679 */ 3680 for (i=0;i<xadj[1];i++) { 3681 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3682 } 3683 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3684 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3685 if (aggregate) { 3686 PetscInt lrows,row,ncols,*cols; 3687 PetscMPIInt nrank; 3688 PetscScalar *vals; 3689 3690 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3691 lrows = 0; 3692 if (nrank<redprocs) { 3693 lrows = size/redprocs; 3694 if (nrank<size%redprocs) lrows++; 3695 } 3696 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3697 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3698 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3699 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3700 row = nrank; 3701 ncols = xadj[1]-xadj[0]; 3702 cols = adjncy; 3703 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3704 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3705 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3706 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = PetscFree(xadj);CHKERRQ(ierr); 3709 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3710 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3711 ierr = PetscFree(vals);CHKERRQ(ierr); 3712 } else { 3713 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3714 } 3715 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3716 3717 /* Partition */ 3718 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3719 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3720 if (use_vwgt) { 3721 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3722 v_wgt[0] = local_size; 3723 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3724 } 3725 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3726 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3727 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3728 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3729 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3730 3731 /* renumber new_ranks to avoid "holes" in new set of processors */ 3732 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3733 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3734 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3735 if (!redprocs) { 3736 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3737 } else { 3738 PetscInt idxs[1]; 3739 PetscMPIInt tag; 3740 MPI_Request *reqs; 3741 3742 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3743 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3744 for (i=rstart;i<rend;i++) { 3745 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3746 } 3747 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3748 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3749 ierr = PetscFree(reqs);CHKERRQ(ierr); 3750 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3751 } 3752 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3753 /* clean up */ 3754 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3755 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3756 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3757 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3758 } 3759 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3760 3761 /* assemble parallel IS for sends */ 3762 i = 1; 3763 if (color) i=0; 3764 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3765 /* get back IS */ 3766 *is_sends = ranks_send_to; 3767 PetscFunctionReturn(0); 3768 } 3769 3770 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatISSubassemble" 3774 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3775 { 3776 Mat local_mat; 3777 IS is_sends_internal; 3778 PetscInt rows,cols,new_local_rows; 3779 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3780 PetscBool ismatis,isdense,newisdense,destroy_mat; 3781 ISLocalToGlobalMapping l2gmap; 3782 PetscInt* l2gmap_indices; 3783 const PetscInt* is_indices; 3784 MatType new_local_type; 3785 /* buffers */ 3786 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3787 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3788 PetscInt *recv_buffer_idxs_local; 3789 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3790 /* MPI */ 3791 MPI_Comm comm,comm_n; 3792 PetscSubcomm subcomm; 3793 PetscMPIInt n_sends,n_recvs,commsize; 3794 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3795 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3796 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3797 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3798 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3799 PetscErrorCode ierr; 3800 3801 PetscFunctionBegin; 3802 /* TODO: add missing checks */ 3803 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3804 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3805 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3806 PetscValidLogicalCollectiveInt(mat,nis,7); 3807 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3808 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3809 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3810 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3811 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3812 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3813 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3814 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3815 PetscInt mrows,mcols,mnrows,mncols; 3816 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3817 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3818 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3819 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3820 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3821 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3822 } 3823 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3824 PetscValidLogicalCollectiveInt(mat,bs,0); 3825 /* prepare IS for sending if not provided */ 3826 if (!is_sends) { 3827 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3828 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3829 } else { 3830 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3831 is_sends_internal = is_sends; 3832 } 3833 3834 /* get comm */ 3835 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3836 3837 /* compute number of sends */ 3838 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3839 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3840 3841 /* compute number of receives */ 3842 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3843 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3844 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3845 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3846 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3847 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3848 ierr = PetscFree(iflags);CHKERRQ(ierr); 3849 3850 /* restrict comm if requested */ 3851 subcomm = 0; 3852 destroy_mat = PETSC_FALSE; 3853 if (restrict_comm) { 3854 PetscMPIInt color,subcommsize; 3855 3856 color = 0; 3857 if (restrict_full) { 3858 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3859 } else { 3860 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3861 } 3862 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3863 subcommsize = commsize - subcommsize; 3864 /* check if reuse has been requested */ 3865 if (reuse == MAT_REUSE_MATRIX) { 3866 if (*mat_n) { 3867 PetscMPIInt subcommsize2; 3868 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3869 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3870 comm_n = PetscObjectComm((PetscObject)*mat_n); 3871 } else { 3872 comm_n = PETSC_COMM_SELF; 3873 } 3874 } else { /* MAT_INITIAL_MATRIX */ 3875 PetscMPIInt rank; 3876 3877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3878 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3879 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3880 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3881 comm_n = PetscSubcommChild(subcomm); 3882 } 3883 /* flag to destroy *mat_n if not significative */ 3884 if (color) destroy_mat = PETSC_TRUE; 3885 } else { 3886 comm_n = comm; 3887 } 3888 3889 /* prepare send/receive buffers */ 3890 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3891 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3892 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3893 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3894 if (nis) { 3895 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3896 } 3897 3898 /* Get data from local matrices */ 3899 if (!isdense) { 3900 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3901 /* TODO: See below some guidelines on how to prepare the local buffers */ 3902 /* 3903 send_buffer_vals should contain the raw values of the local matrix 3904 send_buffer_idxs should contain: 3905 - MatType_PRIVATE type 3906 - PetscInt size_of_l2gmap 3907 - PetscInt global_row_indices[size_of_l2gmap] 3908 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3909 */ 3910 } else { 3911 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3912 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3913 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3914 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3915 send_buffer_idxs[1] = i; 3916 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3917 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3918 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3919 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3920 for (i=0;i<n_sends;i++) { 3921 ilengths_vals[is_indices[i]] = len*len; 3922 ilengths_idxs[is_indices[i]] = len+2; 3923 } 3924 } 3925 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3926 /* additional is (if any) */ 3927 if (nis) { 3928 PetscMPIInt psum; 3929 PetscInt j; 3930 for (j=0,psum=0;j<nis;j++) { 3931 PetscInt plen; 3932 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3933 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3934 psum += len+1; /* indices + lenght */ 3935 } 3936 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3937 for (j=0,psum=0;j<nis;j++) { 3938 PetscInt plen; 3939 const PetscInt *is_array_idxs; 3940 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3941 send_buffer_idxs_is[psum] = plen; 3942 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3943 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3944 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3945 psum += plen+1; /* indices + lenght */ 3946 } 3947 for (i=0;i<n_sends;i++) { 3948 ilengths_idxs_is[is_indices[i]] = psum; 3949 } 3950 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3951 } 3952 3953 buf_size_idxs = 0; 3954 buf_size_vals = 0; 3955 buf_size_idxs_is = 0; 3956 for (i=0;i<n_recvs;i++) { 3957 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3958 buf_size_vals += (PetscInt)olengths_vals[i]; 3959 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3960 } 3961 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3962 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3963 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3964 3965 /* get new tags for clean communications */ 3966 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3967 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3968 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3969 3970 /* allocate for requests */ 3971 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3972 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3973 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3974 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3975 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3976 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3977 3978 /* communications */ 3979 ptr_idxs = recv_buffer_idxs; 3980 ptr_vals = recv_buffer_vals; 3981 ptr_idxs_is = recv_buffer_idxs_is; 3982 for (i=0;i<n_recvs;i++) { 3983 source_dest = onodes[i]; 3984 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3985 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3986 ptr_idxs += olengths_idxs[i]; 3987 ptr_vals += olengths_vals[i]; 3988 if (nis) { 3989 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3990 ptr_idxs_is += olengths_idxs_is[i]; 3991 } 3992 } 3993 for (i=0;i<n_sends;i++) { 3994 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3995 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3996 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3997 if (nis) { 3998 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3999 } 4000 } 4001 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4002 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4003 4004 /* assemble new l2g map */ 4005 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4006 ptr_idxs = recv_buffer_idxs; 4007 new_local_rows = 0; 4008 for (i=0;i<n_recvs;i++) { 4009 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4010 ptr_idxs += olengths_idxs[i]; 4011 } 4012 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4013 ptr_idxs = recv_buffer_idxs; 4014 new_local_rows = 0; 4015 for (i=0;i<n_recvs;i++) { 4016 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4017 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4018 ptr_idxs += olengths_idxs[i]; 4019 } 4020 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4021 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4022 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4023 4024 /* infer new local matrix type from received local matrices type */ 4025 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4026 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4027 if (n_recvs) { 4028 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4029 ptr_idxs = recv_buffer_idxs; 4030 for (i=0;i<n_recvs;i++) { 4031 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4032 new_local_type_private = MATAIJ_PRIVATE; 4033 break; 4034 } 4035 ptr_idxs += olengths_idxs[i]; 4036 } 4037 switch (new_local_type_private) { 4038 case MATDENSE_PRIVATE: 4039 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4040 new_local_type = MATSEQAIJ; 4041 bs = 1; 4042 } else { /* if I receive only 1 dense matrix */ 4043 new_local_type = MATSEQDENSE; 4044 bs = 1; 4045 } 4046 break; 4047 case MATAIJ_PRIVATE: 4048 new_local_type = MATSEQAIJ; 4049 bs = 1; 4050 break; 4051 case MATBAIJ_PRIVATE: 4052 new_local_type = MATSEQBAIJ; 4053 break; 4054 case MATSBAIJ_PRIVATE: 4055 new_local_type = MATSEQSBAIJ; 4056 break; 4057 default: 4058 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4059 break; 4060 } 4061 } else { /* by default, new_local_type is seqdense */ 4062 new_local_type = MATSEQDENSE; 4063 bs = 1; 4064 } 4065 4066 /* create MATIS object if needed */ 4067 if (reuse == MAT_INITIAL_MATRIX) { 4068 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4069 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4070 } else { 4071 /* it also destroys the local matrices */ 4072 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4073 } 4074 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4075 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4076 4077 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4078 4079 /* Global to local map of received indices */ 4080 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4081 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4082 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4083 4084 /* restore attributes -> type of incoming data and its size */ 4085 buf_size_idxs = 0; 4086 for (i=0;i<n_recvs;i++) { 4087 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4088 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4089 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4090 } 4091 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4092 4093 /* set preallocation */ 4094 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4095 if (!newisdense) { 4096 PetscInt *new_local_nnz=0; 4097 4098 ptr_vals = recv_buffer_vals; 4099 ptr_idxs = recv_buffer_idxs_local; 4100 if (n_recvs) { 4101 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4102 } 4103 for (i=0;i<n_recvs;i++) { 4104 PetscInt j; 4105 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4106 for (j=0;j<*(ptr_idxs+1);j++) { 4107 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4108 } 4109 } else { 4110 /* TODO */ 4111 } 4112 ptr_idxs += olengths_idxs[i]; 4113 } 4114 if (new_local_nnz) { 4115 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4116 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4117 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4118 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4119 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4120 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4121 } else { 4122 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4123 } 4124 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4125 } else { 4126 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4127 } 4128 4129 /* set values */ 4130 ptr_vals = recv_buffer_vals; 4131 ptr_idxs = recv_buffer_idxs_local; 4132 for (i=0;i<n_recvs;i++) { 4133 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4134 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4135 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4136 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4137 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4138 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4139 } else { 4140 /* TODO */ 4141 } 4142 ptr_idxs += olengths_idxs[i]; 4143 ptr_vals += olengths_vals[i]; 4144 } 4145 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4146 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4147 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4150 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4151 4152 #if 0 4153 if (!restrict_comm) { /* check */ 4154 Vec lvec,rvec; 4155 PetscReal infty_error; 4156 4157 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4158 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4159 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4160 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4161 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4162 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4163 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4164 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4165 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4166 } 4167 #endif 4168 4169 /* assemble new additional is (if any) */ 4170 if (nis) { 4171 PetscInt **temp_idxs,*count_is,j,psum; 4172 4173 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4174 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4175 ptr_idxs = recv_buffer_idxs_is; 4176 psum = 0; 4177 for (i=0;i<n_recvs;i++) { 4178 for (j=0;j<nis;j++) { 4179 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4180 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4181 psum += plen; 4182 ptr_idxs += plen+1; /* shift pointer to received data */ 4183 } 4184 } 4185 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4186 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4187 for (i=1;i<nis;i++) { 4188 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4189 } 4190 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4191 ptr_idxs = recv_buffer_idxs_is; 4192 for (i=0;i<n_recvs;i++) { 4193 for (j=0;j<nis;j++) { 4194 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4195 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4196 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4197 ptr_idxs += plen+1; /* shift pointer to received data */ 4198 } 4199 } 4200 for (i=0;i<nis;i++) { 4201 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4202 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4203 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4204 } 4205 ierr = PetscFree(count_is);CHKERRQ(ierr); 4206 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4207 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4208 } 4209 /* free workspace */ 4210 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4211 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4212 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4213 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4214 if (isdense) { 4215 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4216 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4217 } else { 4218 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4219 } 4220 if (nis) { 4221 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4222 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4223 } 4224 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4225 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4226 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4227 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4228 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4229 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4230 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4231 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4232 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4233 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4234 ierr = PetscFree(onodes);CHKERRQ(ierr); 4235 if (nis) { 4236 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4237 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4238 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4239 } 4240 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4241 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4242 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4243 for (i=0;i<nis;i++) { 4244 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4245 } 4246 *mat_n = NULL; 4247 } 4248 PetscFunctionReturn(0); 4249 } 4250 4251 /* temporary hack into ksp private data structure */ 4252 #include <petsc/private/kspimpl.h> 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4256 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4257 { 4258 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4259 PC_IS *pcis = (PC_IS*)pc->data; 4260 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4261 MatNullSpace CoarseNullSpace=NULL; 4262 ISLocalToGlobalMapping coarse_islg; 4263 IS coarse_is,*isarray; 4264 PetscInt i,im_active=-1,active_procs=-1; 4265 PetscInt nis,nisdofs,nisneu; 4266 PC pc_temp; 4267 PCType coarse_pc_type; 4268 KSPType coarse_ksp_type; 4269 PetscBool multilevel_requested,multilevel_allowed; 4270 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4271 Mat t_coarse_mat_is; 4272 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4273 PetscMPIInt all_procs; 4274 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4275 PetscBool compute_vecs = PETSC_FALSE; 4276 PetscScalar *array; 4277 PetscErrorCode ierr; 4278 4279 PetscFunctionBegin; 4280 /* Assign global numbering to coarse dofs */ 4281 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4282 PetscInt ocoarse_size; 4283 compute_vecs = PETSC_TRUE; 4284 ocoarse_size = pcbddc->coarse_size; 4285 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4286 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4287 /* see if we can avoid some work */ 4288 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4289 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4290 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4291 PC pc; 4292 PetscBool isbddc; 4293 4294 /* temporary workaround since PCBDDC does not have a reset method so far */ 4295 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4296 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4297 if (isbddc) { 4298 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4299 } 4300 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4301 coarse_reuse = PETSC_FALSE; 4302 } else { /* we can safely reuse already computed coarse matrix */ 4303 coarse_reuse = PETSC_TRUE; 4304 } 4305 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4306 coarse_reuse = PETSC_FALSE; 4307 } 4308 /* reset any subassembling information */ 4309 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4310 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4311 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4312 coarse_reuse = PETSC_TRUE; 4313 } 4314 4315 /* count "active" (i.e. with positive local size) and "void" processes */ 4316 im_active = !!(pcis->n); 4317 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4319 void_procs = all_procs-active_procs; 4320 csin_type_simple = PETSC_TRUE; 4321 redist = PETSC_FALSE; 4322 if (pcbddc->current_level && void_procs) { 4323 csin_ml = PETSC_TRUE; 4324 ncoarse_ml = void_procs; 4325 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4326 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4327 csin_ds = PETSC_TRUE; 4328 ncoarse_ds = pcbddc->redistribute_coarse; 4329 redist = PETSC_TRUE; 4330 } else { 4331 csin_ds = PETSC_TRUE; 4332 ncoarse_ds = active_procs; 4333 redist = PETSC_TRUE; 4334 } 4335 } else { 4336 csin_ml = PETSC_FALSE; 4337 ncoarse_ml = all_procs; 4338 if (void_procs) { 4339 csin_ds = PETSC_TRUE; 4340 ncoarse_ds = void_procs; 4341 csin_type_simple = PETSC_FALSE; 4342 } else { 4343 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4344 csin_ds = PETSC_TRUE; 4345 ncoarse_ds = pcbddc->redistribute_coarse; 4346 redist = PETSC_TRUE; 4347 } else { 4348 csin_ds = PETSC_FALSE; 4349 ncoarse_ds = all_procs; 4350 } 4351 } 4352 } 4353 4354 /* 4355 test if we can go multilevel: three conditions must be satisfied: 4356 - we have not exceeded the number of levels requested 4357 - we can actually subassemble the active processes 4358 - we can find a suitable number of MPI processes where we can place the subassembled problem 4359 */ 4360 multilevel_allowed = PETSC_FALSE; 4361 multilevel_requested = PETSC_FALSE; 4362 if (pcbddc->current_level < pcbddc->max_levels) { 4363 multilevel_requested = PETSC_TRUE; 4364 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4365 multilevel_allowed = PETSC_FALSE; 4366 } else { 4367 multilevel_allowed = PETSC_TRUE; 4368 } 4369 } 4370 /* determine number of process partecipating to coarse solver */ 4371 if (multilevel_allowed) { 4372 ncoarse = ncoarse_ml; 4373 csin = csin_ml; 4374 redist = PETSC_FALSE; 4375 } else { 4376 ncoarse = ncoarse_ds; 4377 csin = csin_ds; 4378 } 4379 4380 /* creates temporary l2gmap and IS for coarse indexes */ 4381 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4382 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4383 4384 /* creates temporary MATIS object for coarse matrix */ 4385 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4386 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4387 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4388 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4389 #if 0 4390 { 4391 PetscViewer viewer; 4392 char filename[256]; 4393 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4394 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4395 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4396 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4397 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4398 } 4399 #endif 4400 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4401 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4402 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4403 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4404 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4405 4406 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4407 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4408 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4409 const PetscInt *idxs; 4410 ISLocalToGlobalMapping tmap; 4411 4412 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4413 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4414 /* allocate space for temporary storage */ 4415 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4416 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4417 /* allocate for IS array */ 4418 nisdofs = pcbddc->n_ISForDofsLocal; 4419 nisneu = !!pcbddc->NeumannBoundariesLocal; 4420 nis = nisdofs + nisneu; 4421 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4422 /* dofs splitting */ 4423 for (i=0;i<nisdofs;i++) { 4424 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4425 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4426 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4427 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4428 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4429 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4430 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4431 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4432 } 4433 /* neumann boundaries */ 4434 if (pcbddc->NeumannBoundariesLocal) { 4435 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4436 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4437 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4438 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4439 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4440 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4441 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4442 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4443 } 4444 /* free memory */ 4445 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4446 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4447 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4448 } else { 4449 nis = 0; 4450 nisdofs = 0; 4451 nisneu = 0; 4452 isarray = NULL; 4453 } 4454 /* destroy no longer needed map */ 4455 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4456 4457 /* restrict on coarse candidates (if needed) */ 4458 coarse_mat_is = NULL; 4459 if (csin) { 4460 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4461 if (redist) { 4462 PetscMPIInt rank; 4463 PetscInt spc,n_spc_p1,dest[1],destsize; 4464 4465 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4466 spc = active_procs/ncoarse; 4467 n_spc_p1 = active_procs%ncoarse; 4468 if (im_active) { 4469 destsize = 1; 4470 if (rank > n_spc_p1*(spc+1)-1) { 4471 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4472 } else { 4473 dest[0] = rank/(spc+1); 4474 } 4475 } else { 4476 destsize = 0; 4477 } 4478 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4479 } else if (csin_type_simple) { 4480 PetscMPIInt rank; 4481 PetscInt issize,isidx; 4482 4483 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4484 if (im_active) { 4485 issize = 1; 4486 isidx = (PetscInt)rank; 4487 } else { 4488 issize = 0; 4489 isidx = -1; 4490 } 4491 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4492 } else { /* get a suitable subassembling pattern from MATIS code */ 4493 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4494 } 4495 4496 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4497 if (!redist || ncoarse <= void_procs) { 4498 PetscInt ncoarse_cand,tissize,*nisindices; 4499 PetscInt *coarse_candidates; 4500 const PetscInt* tisindices; 4501 4502 /* get coarse candidates' ranks in pc communicator */ 4503 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4504 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4505 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4506 if (!coarse_candidates[i]) { 4507 coarse_candidates[ncoarse_cand++]=i; 4508 } 4509 } 4510 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4511 4512 4513 if (pcbddc->dbg_flag) { 4514 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4516 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4518 for (i=0;i<ncoarse_cand;i++) { 4519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4520 } 4521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4522 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4523 } 4524 /* shift the pattern on coarse candidates */ 4525 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4526 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4527 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4528 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4529 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4530 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4531 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4532 } 4533 if (pcbddc->dbg_flag) { 4534 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4536 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4537 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4538 } 4539 } 4540 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4541 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4542 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4543 } else { /* this is the last level, so use just receiving processes in subcomm */ 4544 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4545 } 4546 } else { 4547 if (pcbddc->dbg_flag) { 4548 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4550 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4551 } 4552 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4553 coarse_mat_is = t_coarse_mat_is; 4554 } 4555 4556 /* create local to global scatters for coarse problem */ 4557 if (compute_vecs) { 4558 PetscInt lrows; 4559 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4560 if (coarse_mat_is) { 4561 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4562 } else { 4563 lrows = 0; 4564 } 4565 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4566 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4567 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4568 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4569 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4570 } 4571 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4572 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4573 4574 /* set defaults for coarse KSP and PC */ 4575 if (multilevel_allowed) { 4576 coarse_ksp_type = KSPRICHARDSON; 4577 coarse_pc_type = PCBDDC; 4578 } else { 4579 coarse_ksp_type = KSPPREONLY; 4580 coarse_pc_type = PCREDUNDANT; 4581 } 4582 4583 /* print some info if requested */ 4584 if (pcbddc->dbg_flag) { 4585 if (!multilevel_allowed) { 4586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4587 if (multilevel_requested) { 4588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4589 } else if (pcbddc->max_levels) { 4590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4591 } 4592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4593 } 4594 } 4595 4596 /* create the coarse KSP object only once with defaults */ 4597 if (coarse_mat_is) { 4598 MatReuse coarse_mat_reuse; 4599 PetscViewer dbg_viewer = NULL; 4600 if (pcbddc->dbg_flag) { 4601 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4602 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4603 } 4604 if (!pcbddc->coarse_ksp) { 4605 char prefix[256],str_level[16]; 4606 size_t len; 4607 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4608 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4609 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4610 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4611 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4612 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4613 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4614 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4615 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4616 /* prefix */ 4617 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4618 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4619 if (!pcbddc->current_level) { 4620 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4621 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4622 } else { 4623 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4624 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4625 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4626 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4627 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4628 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4629 } 4630 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4631 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4632 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4633 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4634 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4635 /* allow user customization */ 4636 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4637 } 4638 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4639 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4640 if (nisdofs) { 4641 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4642 for (i=0;i<nisdofs;i++) { 4643 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4644 } 4645 } 4646 if (nisneu) { 4647 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4648 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4649 } 4650 4651 /* get some info after set from options */ 4652 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4653 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4654 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4655 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4656 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4657 isbddc = PETSC_FALSE; 4658 } 4659 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4660 if (isredundant) { 4661 KSP inner_ksp; 4662 PC inner_pc; 4663 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4664 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4665 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4666 } 4667 4668 /* assemble coarse matrix */ 4669 if (coarse_reuse) { 4670 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4671 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4672 coarse_mat_reuse = MAT_REUSE_MATRIX; 4673 } else { 4674 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4675 } 4676 if (isbddc || isnn) { 4677 if (pcbddc->coarsening_ratio > 1) { 4678 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4679 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4680 if (pcbddc->dbg_flag) { 4681 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4682 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4683 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4684 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4685 } 4686 } 4687 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4688 } else { 4689 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4690 coarse_mat = coarse_mat_is; 4691 } 4692 } else { 4693 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4694 } 4695 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4696 4697 /* propagate symmetry info of coarse matrix */ 4698 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4699 if (pc->pmat->symmetric_set) { 4700 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4701 } 4702 if (pc->pmat->hermitian_set) { 4703 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4704 } 4705 if (pc->pmat->spd_set) { 4706 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4707 } 4708 /* set operators */ 4709 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4710 if (pcbddc->dbg_flag) { 4711 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4712 } 4713 } else { /* processes non partecipating to coarse solver (if any) */ 4714 coarse_mat = 0; 4715 } 4716 ierr = PetscFree(isarray);CHKERRQ(ierr); 4717 #if 0 4718 { 4719 PetscViewer viewer; 4720 char filename[256]; 4721 sprintf(filename,"coarse_mat.m"); 4722 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4723 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4724 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4725 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4726 } 4727 #endif 4728 4729 /* Compute coarse null space (special handling by BDDC only) */ 4730 #if 0 4731 if (pcbddc->NullSpace) { 4732 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4733 } 4734 #endif 4735 4736 if (pcbddc->coarse_ksp) { 4737 Vec crhs,csol; 4738 PetscBool ispreonly; 4739 4740 if (CoarseNullSpace) { 4741 if (isbddc) { 4742 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4743 } else { 4744 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4745 } 4746 } 4747 /* setup coarse ksp */ 4748 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4749 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4750 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4751 /* hack */ 4752 if (!csol) { 4753 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4754 } 4755 if (!crhs) { 4756 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4757 } 4758 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4759 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4760 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4761 KSP check_ksp; 4762 KSPType check_ksp_type; 4763 PC check_pc; 4764 Vec check_vec,coarse_vec; 4765 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4766 PetscInt its; 4767 PetscBool compute_eigs; 4768 PetscReal *eigs_r,*eigs_c; 4769 PetscInt neigs; 4770 const char *prefix; 4771 4772 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4773 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4774 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4775 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4776 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4777 if (ispreonly) { 4778 check_ksp_type = KSPPREONLY; 4779 compute_eigs = PETSC_FALSE; 4780 } else { 4781 check_ksp_type = KSPGMRES; 4782 compute_eigs = PETSC_TRUE; 4783 } 4784 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4785 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4786 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4787 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4788 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4789 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4790 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4791 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4792 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4793 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4794 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4795 /* create random vec */ 4796 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4797 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4798 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4799 if (CoarseNullSpace) { 4800 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4801 } 4802 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4803 /* solve coarse problem */ 4804 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4805 if (CoarseNullSpace) { 4806 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4807 } 4808 /* set eigenvalue estimation if preonly has not been requested */ 4809 if (compute_eigs) { 4810 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4811 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4812 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4813 lambda_max = eigs_r[neigs-1]; 4814 lambda_min = eigs_r[0]; 4815 if (pcbddc->use_coarse_estimates) { 4816 if (lambda_max>lambda_min) { 4817 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4818 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4819 } 4820 } 4821 } 4822 4823 /* check coarse problem residual error */ 4824 if (pcbddc->dbg_flag) { 4825 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4826 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4827 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4828 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4829 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4830 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4831 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4833 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4834 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4837 if (compute_eigs) { 4838 PetscReal lambda_max_s,lambda_min_s; 4839 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4840 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4841 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4842 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4843 for (i=0;i<neigs;i++) { 4844 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4845 } 4846 } 4847 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4848 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4849 } 4850 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4851 if (compute_eigs) { 4852 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4853 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4854 } 4855 } 4856 } 4857 /* print additional info */ 4858 if (pcbddc->dbg_flag) { 4859 /* waits until all processes reaches this point */ 4860 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4861 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4862 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4863 } 4864 4865 /* free memory */ 4866 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4867 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4868 PetscFunctionReturn(0); 4869 } 4870 4871 #undef __FUNCT__ 4872 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4873 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4874 { 4875 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4876 PC_IS* pcis = (PC_IS*)pc->data; 4877 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4878 IS subset,subset_mult,subset_n; 4879 PetscInt local_size,coarse_size=0; 4880 PetscInt *local_primal_indices=NULL; 4881 const PetscInt *t_local_primal_indices; 4882 PetscErrorCode ierr; 4883 4884 PetscFunctionBegin; 4885 /* Compute global number of coarse dofs */ 4886 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4887 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4888 } 4889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4890 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4891 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4892 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4893 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4894 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4895 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4896 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4897 if (local_size != pcbddc->local_primal_size) { 4898 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4899 } 4900 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4901 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4902 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4903 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4904 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4905 4906 /* check numbering */ 4907 if (pcbddc->dbg_flag) { 4908 PetscScalar coarsesum,*array,*array2; 4909 PetscInt i; 4910 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4911 4912 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4913 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4914 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4915 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4916 /* counter */ 4917 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4918 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4919 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4920 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4921 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4922 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4923 4924 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4925 for (i=0;i<pcbddc->local_primal_size;i++) { 4926 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4927 } 4928 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4929 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4930 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4931 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4932 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4933 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4934 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4935 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4936 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4937 for (i=0;i<pcis->n;i++) { 4938 if (array[i] != 0.0 && array[i] != array2[i]) { 4939 PetscInt owned = (PetscInt)(array[i]); 4940 PetscInt neigh = (PetscInt)(array2[i]); 4941 set_error = PETSC_TRUE; 4942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4943 } 4944 } 4945 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4946 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4948 for (i=0;i<pcis->n;i++) { 4949 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4950 } 4951 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4952 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4953 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4954 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4955 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4957 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4958 PetscInt *gidxs; 4959 4960 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4961 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4962 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4963 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4964 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4965 for (i=0;i<pcbddc->local_primal_size;i++) { 4966 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4967 } 4968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4969 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4970 } 4971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4972 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4973 } 4974 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4975 /* get back data */ 4976 *coarse_size_n = coarse_size; 4977 *local_primal_indices_n = local_primal_indices; 4978 PetscFunctionReturn(0); 4979 } 4980 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "PCBDDCGlobalToLocal" 4983 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4984 { 4985 IS localis_t; 4986 PetscInt i,lsize,*idxs,n; 4987 PetscScalar *vals; 4988 PetscErrorCode ierr; 4989 4990 PetscFunctionBegin; 4991 /* get indices in local ordering exploiting local to global map */ 4992 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4993 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4994 for (i=0;i<lsize;i++) vals[i] = 1.0; 4995 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4996 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4997 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4998 if (idxs) { /* multilevel guard */ 4999 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5000 } 5001 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5002 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5003 ierr = PetscFree(vals);CHKERRQ(ierr); 5004 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5005 /* now compute set in local ordering */ 5006 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5007 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5008 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5009 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5010 for (i=0,lsize=0;i<n;i++) { 5011 if (PetscRealPart(vals[i]) > 0.5) { 5012 lsize++; 5013 } 5014 } 5015 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5016 for (i=0,lsize=0;i<n;i++) { 5017 if (PetscRealPart(vals[i]) > 0.5) { 5018 idxs[lsize++] = i; 5019 } 5020 } 5021 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5022 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5023 *localis = localis_t; 5024 PetscFunctionReturn(0); 5025 } 5026 5027 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5028 #undef __FUNCT__ 5029 #define __FUNCT__ "PCBDDCMatMult_Private" 5030 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5031 { 5032 PCBDDCChange_ctx change_ctx; 5033 PetscErrorCode ierr; 5034 5035 PetscFunctionBegin; 5036 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5037 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5038 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5039 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5040 PetscFunctionReturn(0); 5041 } 5042 5043 #undef __FUNCT__ 5044 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5045 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5046 { 5047 PCBDDCChange_ctx change_ctx; 5048 PetscErrorCode ierr; 5049 5050 PetscFunctionBegin; 5051 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5052 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5053 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5054 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5055 PetscFunctionReturn(0); 5056 } 5057 5058 #undef __FUNCT__ 5059 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5060 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5061 { 5062 PC_IS *pcis=(PC_IS*)pc->data; 5063 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5064 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5065 Mat S_j; 5066 PetscInt *used_xadj,*used_adjncy; 5067 PetscBool free_used_adj; 5068 PetscErrorCode ierr; 5069 5070 PetscFunctionBegin; 5071 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5072 free_used_adj = PETSC_FALSE; 5073 if (pcbddc->sub_schurs_layers == -1) { 5074 used_xadj = NULL; 5075 used_adjncy = NULL; 5076 } else { 5077 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5078 used_xadj = pcbddc->mat_graph->xadj; 5079 used_adjncy = pcbddc->mat_graph->adjncy; 5080 } else if (pcbddc->computed_rowadj) { 5081 used_xadj = pcbddc->mat_graph->xadj; 5082 used_adjncy = pcbddc->mat_graph->adjncy; 5083 } else { 5084 PetscBool flg_row=PETSC_FALSE; 5085 const PetscInt *xadj,*adjncy; 5086 PetscInt nvtxs; 5087 5088 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5089 if (flg_row) { 5090 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5091 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5092 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5093 free_used_adj = PETSC_TRUE; 5094 } else { 5095 pcbddc->sub_schurs_layers = -1; 5096 used_xadj = NULL; 5097 used_adjncy = NULL; 5098 } 5099 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5100 } 5101 } 5102 5103 /* setup sub_schurs data */ 5104 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5105 if (!sub_schurs->use_mumps) { 5106 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5107 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5108 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5109 } else { 5110 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5111 PetscBool isseqaij; 5112 if (!pcbddc->use_vertices && reuse_solvers) { 5113 PetscInt n_vertices; 5114 5115 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5116 reuse_solvers = (PetscBool)!n_vertices; 5117 } 5118 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5119 if (!isseqaij) { 5120 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5121 if (matis->A == pcbddc->local_mat) { 5122 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5123 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5124 } else { 5125 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5126 } 5127 } 5128 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5129 } 5130 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5131 5132 /* free adjacency */ 5133 if (free_used_adj) { 5134 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5135 } 5136 PetscFunctionReturn(0); 5137 } 5138 5139 #undef __FUNCT__ 5140 #define __FUNCT__ "PCBDDCInitSubSchurs" 5141 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5142 { 5143 PC_IS *pcis=(PC_IS*)pc->data; 5144 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5145 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5146 PCBDDCGraph graph; 5147 PetscErrorCode ierr; 5148 5149 PetscFunctionBegin; 5150 /* attach interface graph for determining subsets */ 5151 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5152 IS verticesIS,verticescomm; 5153 PetscInt vsize,*idxs; 5154 5155 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5156 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5157 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5158 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5159 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5160 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5161 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5162 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5163 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5164 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5165 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5166 /* 5167 if (pcbddc->dbg_flag) { 5168 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5169 } 5170 */ 5171 } else { 5172 graph = pcbddc->mat_graph; 5173 } 5174 5175 /* sub_schurs init */ 5176 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5177 5178 /* free graph struct */ 5179 if (pcbddc->sub_schurs_rebuild) { 5180 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5181 } 5182 PetscFunctionReturn(0); 5183 } 5184