1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 19 } 20 if (pop) { /* use SF to get values */ 21 PetscScalar *array; 22 23 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 24 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 25 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 26 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 27 } else { /* use VecSetValue */ 28 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 29 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 30 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 37 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 38 { 39 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 40 PetscErrorCode ierr; 41 42 PetscFunctionBegin; 43 /* TODO: add error checking 44 - avoid nested pop (or push) calls. 45 - cannot push before pop. 46 */ 47 if (!pcbddc->zerodiag) { 48 PetscFunctionReturn(0); 49 } 50 if (pop) { 51 const PetscInt *cB0_cols; 52 PetscInt cB0_ncol; 53 const PetscScalar *cB0_vals; 54 55 /* extract B_0 */ 56 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 57 pcbddc->B0_ncol = cB0_ncol; 58 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 59 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 60 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 61 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 62 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 /* remove rows and cols from local problem */ 64 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 65 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 66 } else { /* push */ 67 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 68 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 69 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 70 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 71 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "PCBDDCAdaptiveSelection" 78 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 79 { 80 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 81 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 82 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 83 PetscBLASInt *B_iwork,*B_ifail; 84 PetscScalar *work,lwork; 85 PetscScalar *St,*S,*eigv; 86 PetscScalar *Sarray,*Starray; 87 PetscReal *eigs,thresh; 88 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 89 PetscBool allocated_S_St; 90 #if defined(PETSC_USE_COMPLEX) 91 PetscReal *rwork; 92 #endif 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 if (!sub_schurs->use_mumps) { 97 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 98 } 99 100 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 101 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 102 } 103 104 if (pcbddc->dbg_flag) { 105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 107 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 108 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 109 } 110 111 if (pcbddc->dbg_flag) { 112 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 113 } 114 115 /* max size of subsets */ 116 mss = 0; 117 for (i=0;i<sub_schurs->n_subs;i++) { 118 PetscInt subset_size; 119 120 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 121 mss = PetscMax(mss,subset_size); 122 } 123 124 /* min/max and threshold */ 125 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 126 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 127 nmax = PetscMax(nmin,nmax); 128 allocated_S_St = PETSC_FALSE; 129 if (nmin) { 130 allocated_S_St = PETSC_TRUE; 131 } 132 133 /* allocate lapack workspace */ 134 cum = cum2 = 0; 135 maxneigs = 0; 136 for (i=0;i<sub_schurs->n_subs;i++) { 137 PetscInt n,subset_size; 138 139 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 140 n = PetscMin(subset_size,nmax); 141 cum += subset_size; 142 cum2 += subset_size*n; 143 maxneigs = PetscMax(maxneigs,n); 144 } 145 if (mss) { 146 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 147 PetscBLASInt B_itype = 1; 148 PetscBLASInt B_N = mss; 149 PetscReal zero = 0.0; 150 PetscReal eps = 0.0; /* dlamch? */ 151 152 B_lwork = -1; 153 S = NULL; 154 St = NULL; 155 eigs = NULL; 156 eigv = NULL; 157 B_iwork = NULL; 158 B_ifail = NULL; 159 #if defined(PETSC_USE_COMPLEX) 160 rwork = NULL; 161 #endif 162 thresh = 1.0; 163 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 164 #if defined(PETSC_USE_COMPLEX) 165 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 166 #else 167 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 168 #endif 169 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 171 } else { 172 /* TODO */ 173 } 174 } else { 175 lwork = 0; 176 } 177 178 nv = 0; 179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 180 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 181 } 182 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 183 if (allocated_S_St) { 184 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 185 } 186 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 187 #if defined(PETSC_USE_COMPLEX) 188 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 189 #endif 190 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 191 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 192 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 193 nv+cum,&pcbddc->adaptive_constraints_idxs, 194 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 195 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 196 197 maxneigs = 0; 198 cum = cum2 = cumarray = 0; 199 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 200 pcbddc->adaptive_constraints_data_ptr[0] = 0; 201 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 202 const PetscInt *idxs; 203 204 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 205 for (cum=0;cum<nv;cum++) { 206 pcbddc->adaptive_constraints_n[cum] = 1; 207 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 208 pcbddc->adaptive_constraints_data[cum] = 1.0; 209 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 210 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 211 } 212 cum2 = cum; 213 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 214 } 215 216 if (mss) { /* multilevel */ 217 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 218 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 219 } 220 221 for (i=0;i<sub_schurs->n_subs;i++) { 222 223 const PetscInt *idxs; 224 PetscReal infty = PETSC_MAX_REAL; 225 PetscInt j,subset_size,eigs_start = 0; 226 PetscBLASInt B_N; 227 PetscBool same_data = PETSC_FALSE; 228 229 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 230 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 231 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 232 if (sub_schurs->is_hermitian) { 233 PetscInt j,k; 234 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 235 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 236 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 for (k=j;k<subset_size;k++) { 240 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 241 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 242 } 243 } 244 } else { 245 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 246 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 247 } 248 } else { 249 S = Sarray + cumarray; 250 St = Starray + cumarray; 251 } 252 253 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 254 /* see if we can save some work */ 255 if (sub_schurs->n_subs == 1) { 256 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 257 } 258 259 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 260 B_neigs = 0; 261 } else { 262 /* Threshold: this is an heuristic for edges */ 263 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 264 265 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 266 PetscBLASInt B_itype = 1; 267 PetscBLASInt B_IL, B_IU; 268 PetscReal eps = -1.0; /* dlamch? */ 269 PetscInt nmin_s; 270 271 /* ask for eigenvalues larger than thresh */ 272 if (pcbddc->dbg_flag) { 273 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 274 } 275 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 276 #if defined(PETSC_USE_COMPLEX) 277 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 278 #else 279 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 280 #endif 281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 282 if (B_ierr) { 283 if (B_ierr < 0 ) { 284 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 285 } else if (B_ierr <= B_N) { 286 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 287 } else { 288 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 289 } 290 } 291 292 if (B_neigs > nmax) { 293 if (pcbddc->dbg_flag) { 294 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 295 } 296 eigs_start = B_neigs -nmax; 297 B_neigs = nmax; 298 } 299 300 nmin_s = PetscMin(nmin,B_N); 301 if (B_neigs < nmin_s) { 302 PetscBLASInt B_neigs2; 303 304 B_IU = B_N - B_neigs; 305 B_IL = B_N - nmin_s + 1; 306 if (pcbddc->dbg_flag) { 307 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 308 } 309 if (sub_schurs->is_hermitian) { 310 PetscInt j; 311 for (j=0;j<subset_size;j++) { 312 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 313 } 314 for (j=0;j<subset_size;j++) { 315 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 316 } 317 } else { 318 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 319 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 320 } 321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 322 #if defined(PETSC_USE_COMPLEX) 323 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 324 #else 325 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 326 #endif 327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 328 B_neigs += B_neigs2; 329 } 330 if (B_ierr) { 331 if (B_ierr < 0 ) { 332 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 333 } else if (B_ierr <= B_N) { 334 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 335 } else { 336 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 337 } 338 } 339 if (pcbddc->dbg_flag) { 340 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 341 for (j=0;j<B_neigs;j++) { 342 if (eigs[j] == 0.0) { 343 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 344 } else { 345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 346 } 347 } 348 } 349 } else { 350 /* TODO */ 351 } 352 } 353 maxneigs = PetscMax(B_neigs,maxneigs); 354 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 355 if (B_neigs) { 356 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 357 358 if (pcbddc->dbg_flag > 1) { 359 PetscInt ii; 360 for (ii=0;ii<B_neigs;ii++) { 361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 362 for (j=0;j<B_N;j++) { 363 #if defined(PETSC_USE_COMPLEX) 364 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 365 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 367 #else 368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 369 #endif 370 } 371 } 372 } 373 #if 0 374 for (j=0;j<B_neigs;j++) { 375 PetscBLASInt Blas_N,Blas_one = 1.0; 376 PetscScalar norm; 377 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 378 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 379 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 380 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 381 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 382 } else { 383 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 384 } 385 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 } 387 #endif 388 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 389 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 390 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 391 cum++; 392 } 393 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 394 /* shift for next computation */ 395 cumarray += subset_size*subset_size; 396 } 397 if (pcbddc->dbg_flag) { 398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 399 } 400 401 if (mss) { 402 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 403 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 404 /* destroy matrices (junk) */ 405 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 406 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 407 } 408 if (allocated_S_St) { 409 ierr = PetscFree2(S,St);CHKERRQ(ierr); 410 } 411 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 412 #if defined(PETSC_USE_COMPLEX) 413 ierr = PetscFree(rwork);CHKERRQ(ierr); 414 #endif 415 if (pcbddc->dbg_flag) { 416 PetscInt maxneigs_r; 417 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "PCBDDCSetUpSolvers" 425 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 426 { 427 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 428 PetscScalar *coarse_submat_vals; 429 PetscErrorCode ierr; 430 431 PetscFunctionBegin; 432 /* Setup local scatters R_to_B and (optionally) R_to_D */ 433 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 434 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 435 436 /* Setup local neumann solver ksp_R */ 437 /* PCBDDCSetUpLocalScatters should be called first! */ 438 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 439 440 /* Change global null space passed in by the user if change of basis has been requested */ 441 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 442 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 443 } 444 445 /* 446 Setup local correction and local part of coarse basis. 447 Gives back the dense local part of the coarse matrix in column major ordering 448 */ 449 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 450 451 /* Compute total number of coarse nodes and setup coarse solver */ 452 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 453 454 /* free */ 455 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCResetCustomization" 461 PetscErrorCode PCBDDCResetCustomization(PC pc) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 468 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 469 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 470 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 471 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 472 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 473 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 474 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 475 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 476 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 477 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCResetTopography" 483 PetscErrorCode PCBDDCResetTopography(PC pc) 484 { 485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 492 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 493 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "PCBDDCResetSolvers" 499 PetscErrorCode PCBDDCResetSolvers(PC pc) 500 { 501 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 502 PetscScalar *array; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 507 if (pcbddc->coarse_phi_B) { 508 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 509 ierr = PetscFree(array);CHKERRQ(ierr); 510 } 511 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 512 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 513 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 514 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 515 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 519 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 520 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 521 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 522 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 523 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 524 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 525 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 526 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 527 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 528 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 529 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 530 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 531 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 532 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 533 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 534 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 535 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 536 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 537 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 538 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 544 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 545 { 546 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 547 PC_IS *pcis = (PC_IS*)pc->data; 548 VecType impVecType; 549 PetscInt n_constraints,n_R,old_size,n_benign; 550 PetscErrorCode ierr; 551 552 PetscFunctionBegin; 553 if (!pcbddc->ConstraintMatrix) { 554 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 555 } 556 /* get sizes */ 557 n_benign = (PetscInt)(!!pcbddc->zerodiag); 558 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 559 if (n_benign && n_constraints) { 560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 561 } 562 n_R = pcis->n - pcbddc->n_vertices; 563 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 564 /* local work vectors (try to avoid unneeded work)*/ 565 /* R nodes */ 566 old_size = -1; 567 if (pcbddc->vec1_R) { 568 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 569 } 570 if (n_R != old_size) { 571 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 572 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 573 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 574 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 575 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 576 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 577 } 578 /* local primal dofs */ 579 old_size = -1; 580 if (pcbddc->vec1_P) { 581 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 582 } 583 if (pcbddc->local_primal_size != old_size) { 584 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 585 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 586 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 587 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 588 } 589 /* local explicit constraints */ 590 old_size = -1; 591 if (pcbddc->vec1_C) { 592 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 593 } 594 if (n_constraints && n_constraints != old_size) { 595 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 596 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 597 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 598 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 599 } 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNCT__ 604 #define __FUNCT__ "PCBDDCSetUpCorrection" 605 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 606 { 607 PetscErrorCode ierr; 608 /* pointers to pcis and pcbddc */ 609 PC_IS* pcis = (PC_IS*)pc->data; 610 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 611 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 612 /* submatrices of local problem */ 613 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 614 /* submatrices of benign trick */ 615 Mat B0_R,B0_V; 616 /* submatrices of local coarse problem */ 617 Mat S_VV,S_CV,S_VC,S_CC; 618 /* working matrices */ 619 Mat C_CR; 620 /* additional working stuff */ 621 PC pc_R; 622 Mat F; 623 Vec vec_R_benign; 624 PetscBool isLU,isCHOL,isILU; 625 626 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 627 PetscScalar *work; 628 PetscInt *idx_V_B; 629 PetscInt n,n_vertices,n_constraints,n_benign; 630 PetscInt i,n_R,n_D,n_B; 631 PetscBool unsymmetric_check; 632 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 633 MatType impMatType; 634 /* some shortcuts to scalars */ 635 PetscScalar one=1.0,m_one=-1.0; 636 637 PetscFunctionBegin; 638 n_benign = (PetscInt)(!!pcbddc->zerodiag); 639 n_vertices = pcbddc->n_vertices; 640 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 641 /* Set Non-overlapping dimensions */ 642 n_B = pcis->n_B; 643 n_D = pcis->n - n_B; 644 n_R = pcis->n - n_vertices; 645 646 /* Set types for local objects needed by BDDC precondtioner */ 647 impMatType = MATSEQDENSE; 648 649 /* vertices in boundary numbering */ 650 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 651 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 652 if (i != n_vertices) { 653 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 654 } 655 656 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 657 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 658 if (!n_benign) { 659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 660 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 662 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 663 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 664 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 665 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 666 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 667 } else { /* these wrappers are not needed with the benign trick */ 668 S_VV = S_CV = S_VC = S_CC = NULL; 669 } 670 671 unsymmetric_check = PETSC_FALSE; 672 /* allocate workspace */ 673 n = 0; 674 if (n_constraints) { 675 n += n_R*n_constraints; 676 } 677 if (n_vertices) { 678 n = PetscMax(2*n_R*n_vertices,n); 679 n = PetscMax((n_R+n_B)*n_vertices,n); 680 } 681 if (!pcbddc->symmetric_primal) { 682 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 683 unsymmetric_check = PETSC_TRUE; 684 } 685 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 686 687 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 688 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 690 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 691 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 692 if (isLU || isILU || isCHOL) { 693 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 694 } else if (sub_schurs->reuse_mumps) { 695 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 696 MatFactorType type; 697 698 F = reuse_mumps->F; 699 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 700 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 701 } else { 702 F = NULL; 703 } 704 705 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 706 if (n_constraints) { 707 Mat M1,M2,M3; 708 Mat auxmat; 709 IS is_aux; 710 PetscScalar *array,*array2; 711 712 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 713 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 714 715 /* Extract constraints on R nodes: C_{CR} */ 716 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 718 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 719 720 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 721 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 722 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 723 for (i=0;i<n_constraints;i++) { 724 const PetscScalar *row_cmat_values; 725 const PetscInt *row_cmat_indices; 726 PetscInt size_of_constraint,j; 727 728 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 729 for (j=0;j<size_of_constraint;j++) { 730 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 731 } 732 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 733 } 734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 735 if (F) { 736 Mat B; 737 738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 739 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 740 ierr = MatDestroy(&B);CHKERRQ(ierr); 741 } else { 742 PetscScalar *marr; 743 744 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 745 for (i=0;i<n_constraints;i++) { 746 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 747 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 748 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 749 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 750 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 751 } 752 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 753 } 754 if (!pcbddc->switch_static) { 755 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 756 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 757 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 758 for (i=0;i<n_constraints;i++) { 759 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 760 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 761 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 764 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 765 } 766 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 767 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 768 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 769 } else { 770 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 771 pcbddc->local_auxmat2 = local_auxmat2_R; 772 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 773 } 774 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 775 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 776 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 777 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 778 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 779 if (isCHOL) { 780 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 781 } else { 782 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 783 } 784 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 785 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 786 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 787 ierr = MatDestroy(&M2);CHKERRQ(ierr); 788 ierr = MatDestroy(&M3);CHKERRQ(ierr); 789 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 790 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 791 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 792 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 793 ierr = MatDestroy(&M1);CHKERRQ(ierr); 794 } 795 /* Get submatrices from subdomain matrix */ 796 if (n_vertices) { 797 IS is_aux; 798 799 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 800 IS tis; 801 802 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 803 ierr = ISSort(tis);CHKERRQ(ierr); 804 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 805 ierr = ISDestroy(&tis);CHKERRQ(ierr); 806 } else { 807 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 808 } 809 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 810 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 811 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 812 if (n_benign) { 813 Mat B0; 814 IS dummy; 815 PetscInt ii[2]; 816 817 ii[0] = 0; 818 ii[1] = pcbddc->B0_ncol; 819 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 820 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 821 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 822 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 823 ierr = MatDestroy(&B0);CHKERRQ(ierr); 824 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 825 } 826 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 827 } 828 829 /* Matrix of coarse basis functions (local) */ 830 if (pcbddc->coarse_phi_B) { 831 PetscInt on_B,on_primal,on_D=n_D; 832 if (pcbddc->coarse_phi_D) { 833 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 834 } 835 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 836 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 837 PetscScalar *marray; 838 839 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 840 ierr = PetscFree(marray);CHKERRQ(ierr); 841 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 842 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 843 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 844 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 845 } 846 } 847 848 if (!pcbddc->coarse_phi_B) { 849 PetscScalar *marray; 850 851 n = n_B*pcbddc->local_primal_size; 852 if (pcbddc->switch_static || pcbddc->dbg_flag) { 853 n += n_D*pcbddc->local_primal_size; 854 } 855 if (!pcbddc->symmetric_primal) { 856 n *= 2; 857 } 858 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 860 n = n_B*pcbddc->local_primal_size; 861 if (pcbddc->switch_static || pcbddc->dbg_flag) { 862 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 863 n += n_D*pcbddc->local_primal_size; 864 } 865 if (!pcbddc->symmetric_primal) { 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 868 n = n_B*pcbddc->local_primal_size; 869 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 870 } 871 } else { 872 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 873 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 874 if (pcbddc->switch_static || pcbddc->dbg_flag) { 875 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 876 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 877 } 878 } 879 } 880 881 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 882 /* vertices */ 883 if (n_vertices) { 884 885 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 886 887 if (n_R) { 888 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 889 Mat S_V_benign; 890 PetscBLASInt B_N,B_one = 1; 891 PetscScalar *x,*y; 892 PetscBool isseqaij; 893 894 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 895 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 896 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 897 if (F) { /* TODO could be optimized for symmetric problems */ 898 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 899 } else { 900 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 901 for (i=0;i<n_vertices;i++) { 902 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 904 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 905 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 906 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 907 } 908 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 909 } 910 if (n_benign) { 911 const PetscScalar *vals; 912 const PetscInt *idxs; 913 PetscInt n; 914 915 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 916 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 917 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 918 ierr = VecSetValues(pcbddc->vec1_R,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 919 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 920 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 921 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 922 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,vec_R_benign);CHKERRQ(ierr); 923 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 924 } 925 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 926 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 927 if (n_constraints) { 928 Mat B; 929 930 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 931 for (i=0;i<n_vertices;i++) { 932 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 933 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 934 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 937 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 938 } 939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 940 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 941 ierr = MatDestroy(&B);CHKERRQ(ierr); 942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 943 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 944 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 945 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 946 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 947 ierr = MatDestroy(&B);CHKERRQ(ierr); 948 } 949 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 950 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 951 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 952 } 953 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 954 if (n_benign) { 955 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 956 } 957 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 958 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 959 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 960 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 961 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 962 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 963 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 964 if (!n_benign) { 965 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 966 } else { 967 const PetscScalar *vals; 968 PetscScalar val; 969 const PetscInt *idxs; 970 PetscInt n,j; 971 972 /* Use explicit loops to populate the local coarse matrix */ 973 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 974 for (j=0;j<n_vertices;j++) { 975 for (i=0;i<n_vertices;i++) { 976 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 977 } 978 } 979 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 980 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 981 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 982 for (j=0;j<n_vertices;j++) { 983 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 984 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 985 } 986 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 987 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 988 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 989 ierr = VecDot(vec_R_benign,vec_R_benign,&val);CHKERRQ(ierr); 990 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = -val; 991 } 992 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 993 } else { 994 /* TODO benign case also here */ 995 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 996 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 997 } 998 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 999 /* coarse basis functions */ 1000 for (i=0;i<n_vertices;i++) { 1001 PetscScalar *y; 1002 1003 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1004 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1005 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1006 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 y[n_B*i+idx_V_B[i]] = 1.0; 1009 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1010 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1011 1012 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1013 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1014 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1015 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1017 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1018 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1019 } 1020 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1021 } 1022 if (n_benign) { 1023 PetscScalar *y; 1024 1025 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1026 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1030 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1031 1032 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1033 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1034 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1035 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1038 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1039 } 1040 } 1041 /* if n_R == 0 the object is not destroyed */ 1042 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1043 } 1044 1045 if (n_constraints) { 1046 Mat B; 1047 1048 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1049 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1050 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1051 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1052 if (n_vertices) { 1053 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1054 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1055 } else { 1056 Mat S_VCt; 1057 1058 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1059 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1061 } 1062 } 1063 ierr = MatDestroy(&B);CHKERRQ(ierr); 1064 /* coarse basis functions */ 1065 for (i=0;i<n_constraints;i++) { 1066 PetscScalar *y; 1067 1068 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1069 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1070 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1071 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1074 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1075 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1076 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1077 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1078 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1079 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1081 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1082 } 1083 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1084 } 1085 } 1086 if (n_constraints) { 1087 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1088 } 1089 1090 if (n_benign) { 1091 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1092 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1093 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1094 } 1095 1096 /* compute other basis functions for non-symmetric problems */ 1097 if (!pcbddc->symmetric_primal) { 1098 1099 if (n_constraints) { 1100 Mat S_CCT,B_C; 1101 1102 /* this is a lazy thing */ 1103 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1105 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1106 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1107 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1108 if (n_vertices) { 1109 Mat B_V,S_VCT; 1110 1111 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1112 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1113 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1114 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1115 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1116 } 1117 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1118 } else { /* if there are no constraints, reset work */ 1119 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1120 } 1121 if (n_vertices && n_R) { 1122 Mat A_VRT; 1123 PetscScalar *marray; 1124 PetscBLASInt B_N,B_one = 1; 1125 1126 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1127 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1128 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1129 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1130 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1131 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1132 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1133 } 1134 1135 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1136 for (i=0;i<pcbddc->local_primal_size;i++) { 1137 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1138 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1139 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1140 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1141 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1142 } 1143 } else { 1144 for (i=0;i<pcbddc->local_primal_size;i++) { 1145 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1146 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1147 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1148 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1149 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1150 } 1151 } 1152 /* coarse basis functions */ 1153 for (i=0;i<pcbddc->local_primal_size;i++) { 1154 PetscScalar *y; 1155 1156 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1157 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1158 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 if (i<n_vertices) { 1162 y[n_B*i+idx_V_B[i]] = 1.0; 1163 } 1164 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1165 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1166 1167 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1168 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1169 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1170 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1173 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1174 } 1175 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1176 } 1177 } 1178 /* free memory */ 1179 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1180 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1181 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1182 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1183 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1184 ierr = PetscFree(work);CHKERRQ(ierr); 1185 if (n_vertices) { 1186 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1187 } 1188 if (n_constraints) { 1189 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1190 } 1191 /* Checking coarse_sub_mat and coarse basis functios */ 1192 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1193 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1194 //if (pcbddc->dbg_flag) { 1195 if (0) { 1196 Mat coarse_sub_mat; 1197 Mat AUXMAT,TM1,TM2,TM3,TM4; 1198 Mat coarse_phi_D,coarse_phi_B; 1199 Mat coarse_psi_D,coarse_psi_B; 1200 Mat A_II,A_BB,A_IB,A_BI; 1201 Mat C_B,CPHI; 1202 IS is_dummy; 1203 Vec mones; 1204 MatType checkmattype=MATSEQAIJ; 1205 PetscReal real_value; 1206 1207 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1208 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1209 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1210 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1211 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1212 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1213 if (unsymmetric_check) { 1214 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1215 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1216 } 1217 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1218 1219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1220 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1221 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1222 if (unsymmetric_check) { 1223 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1224 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1225 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1226 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1227 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1228 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1229 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1230 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1231 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1232 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1233 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1234 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1235 } else { 1236 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1237 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1238 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1239 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1240 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1241 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1242 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1243 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1244 } 1245 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1246 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1247 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1248 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1249 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1250 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1251 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1253 1254 /* check constraints */ 1255 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1256 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1257 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1258 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1259 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1260 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1261 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1263 if (unsymmetric_check) { 1264 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1265 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1266 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1267 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1269 } 1270 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1271 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1272 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1273 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1274 1275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1276 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1277 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1278 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1279 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1280 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1281 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1282 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1283 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1284 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1285 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1286 if (unsymmetric_check) { 1287 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1288 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1289 } 1290 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1291 } 1292 /* get back data */ 1293 *coarse_submat_vals_n = coarse_submat_vals; 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1299 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1300 { 1301 Mat *work_mat; 1302 IS isrow_s,iscol_s; 1303 PetscBool rsorted,csorted; 1304 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1309 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1310 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1311 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1312 1313 if (!rsorted) { 1314 const PetscInt *idxs; 1315 PetscInt *idxs_sorted,i; 1316 1317 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1318 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1319 for (i=0;i<rsize;i++) { 1320 idxs_perm_r[i] = i; 1321 } 1322 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1323 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1324 for (i=0;i<rsize;i++) { 1325 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1326 } 1327 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1328 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1329 } else { 1330 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1331 isrow_s = isrow; 1332 } 1333 1334 if (!csorted) { 1335 if (isrow == iscol) { 1336 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1337 iscol_s = isrow_s; 1338 } else { 1339 const PetscInt *idxs; 1340 PetscInt *idxs_sorted,i; 1341 1342 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1344 for (i=0;i<csize;i++) { 1345 idxs_perm_c[i] = i; 1346 } 1347 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1348 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1349 for (i=0;i<csize;i++) { 1350 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1351 } 1352 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1353 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1354 } 1355 } else { 1356 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1357 iscol_s = iscol; 1358 } 1359 1360 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1361 1362 if (!rsorted || !csorted) { 1363 Mat new_mat; 1364 IS is_perm_r,is_perm_c; 1365 1366 if (!rsorted) { 1367 PetscInt *idxs_r,i; 1368 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1369 for (i=0;i<rsize;i++) { 1370 idxs_r[idxs_perm_r[i]] = i; 1371 } 1372 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1373 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1374 } else { 1375 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1376 } 1377 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1378 1379 if (!csorted) { 1380 if (isrow_s == iscol_s) { 1381 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1382 is_perm_c = is_perm_r; 1383 } else { 1384 PetscInt *idxs_c,i; 1385 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1386 for (i=0;i<csize;i++) { 1387 idxs_c[idxs_perm_c[i]] = i; 1388 } 1389 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1390 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1391 } 1392 } else { 1393 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1394 } 1395 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1396 1397 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1398 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1399 work_mat[0] = new_mat; 1400 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1401 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1402 } 1403 1404 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1405 *B = work_mat[0]; 1406 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1407 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1408 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1414 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1415 { 1416 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1417 PC_IS* pcis = (PC_IS*)pc->data; 1418 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1419 Mat new_mat; 1420 IS is_local,is_global; 1421 PetscInt local_size; 1422 PetscBool isseqaij; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1427 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1428 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1429 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1431 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1432 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1433 1434 /* check */ 1435 if (pcbddc->dbg_flag) { 1436 Vec x,x_change; 1437 PetscReal error; 1438 1439 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1440 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1441 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1442 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1443 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1444 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1445 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1446 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1447 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1448 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1451 ierr = VecDestroy(&x);CHKERRQ(ierr); 1452 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1453 } 1454 1455 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1456 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1457 if (isseqaij) { 1458 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1459 } else { 1460 Mat work_mat; 1461 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1462 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1463 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1464 } 1465 if (matis->A->symmetric_set) { 1466 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1467 #if !defined(PETSC_USE_COMPLEX) 1468 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1469 #endif 1470 } 1471 /* 1472 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1473 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1474 */ 1475 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1481 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1482 { 1483 PC_IS* pcis = (PC_IS*)(pc->data); 1484 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1485 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1486 PetscInt *idx_R_local=NULL; 1487 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1488 PetscInt vbs,bs; 1489 PetscBT bitmask=NULL; 1490 PetscErrorCode ierr; 1491 1492 PetscFunctionBegin; 1493 /* 1494 No need to setup local scatters if 1495 - primal space is unchanged 1496 AND 1497 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1498 AND 1499 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1500 */ 1501 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1502 PetscFunctionReturn(0); 1503 } 1504 /* destroy old objects */ 1505 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1506 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1507 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1508 /* Set Non-overlapping dimensions */ 1509 n_B = pcis->n_B; 1510 n_D = pcis->n - n_B; 1511 n_vertices = pcbddc->n_vertices; 1512 1513 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1514 1515 /* create auxiliary bitmask and allocate workspace */ 1516 if (!sub_schurs->reuse_mumps) { 1517 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1518 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1519 for (i=0;i<n_vertices;i++) { 1520 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1521 } 1522 1523 for (i=0, n_R=0; i<pcis->n; i++) { 1524 if (!PetscBTLookup(bitmask,i)) { 1525 idx_R_local[n_R++] = i; 1526 } 1527 } 1528 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1529 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1530 1531 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1532 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1533 } 1534 1535 /* Block code */ 1536 vbs = 1; 1537 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1538 if (bs>1 && !(n_vertices%bs)) { 1539 PetscBool is_blocked = PETSC_TRUE; 1540 PetscInt *vary; 1541 if (!sub_schurs->reuse_mumps) { 1542 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1543 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1544 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1545 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1546 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1547 for (i=0; i<pcis->n/bs; i++) { 1548 if (vary[i]!=0 && vary[i]!=bs) { 1549 is_blocked = PETSC_FALSE; 1550 break; 1551 } 1552 } 1553 ierr = PetscFree(vary);CHKERRQ(ierr); 1554 } else { 1555 /* Verify directly the R set */ 1556 for (i=0; i<n_R/bs; i++) { 1557 PetscInt j,node=idx_R_local[bs*i]; 1558 for (j=1; j<bs; j++) { 1559 if (node != idx_R_local[bs*i+j]-j) { 1560 is_blocked = PETSC_FALSE; 1561 break; 1562 } 1563 } 1564 } 1565 } 1566 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1567 vbs = bs; 1568 for (i=0;i<n_R/vbs;i++) { 1569 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1570 } 1571 } 1572 } 1573 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1574 if (sub_schurs->reuse_mumps) { 1575 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1576 1577 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1578 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1579 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1580 reuse_mumps->is_R = pcbddc->is_R_local; 1581 } else { 1582 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1583 } 1584 1585 /* print some info if requested */ 1586 if (pcbddc->dbg_flag) { 1587 PetscInt benign; 1588 1589 benign = (PetscInt)(!!pcbddc->zerodiag); 1590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1592 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1595 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1597 } 1598 1599 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1600 if (!sub_schurs->reuse_mumps) { 1601 IS is_aux1,is_aux2; 1602 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1603 1604 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1605 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1606 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1607 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1608 for (i=0; i<n_D; i++) { 1609 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1610 } 1611 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1612 for (i=0, j=0; i<n_R; i++) { 1613 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1614 aux_array1[j++] = i; 1615 } 1616 } 1617 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1618 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1619 for (i=0, j=0; i<n_B; i++) { 1620 if (!PetscBTLookup(bitmask,is_indices[i])) { 1621 aux_array2[j++] = i; 1622 } 1623 } 1624 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1625 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1626 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1627 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1628 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1629 1630 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1631 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1632 for (i=0, j=0; i<n_R; i++) { 1633 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1634 aux_array1[j++] = i; 1635 } 1636 } 1637 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1638 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1639 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1640 } 1641 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1642 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1643 } else { 1644 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1645 IS tis; 1646 PetscInt schur_size; 1647 1648 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1649 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1650 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1652 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1653 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1654 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1655 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1656 } 1657 } 1658 PetscFunctionReturn(0); 1659 } 1660 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1664 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1665 { 1666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1667 PC_IS *pcis = (PC_IS*)pc->data; 1668 PC pc_temp; 1669 Mat A_RR; 1670 MatReuse reuse; 1671 PetscScalar m_one = -1.0; 1672 PetscReal value; 1673 PetscInt n_D,n_R; 1674 PetscBool use_exact,use_exact_reduced,issbaij; 1675 PetscErrorCode ierr; 1676 /* prefixes stuff */ 1677 char dir_prefix[256],neu_prefix[256],str_level[16]; 1678 size_t len; 1679 1680 PetscFunctionBegin; 1681 1682 /* compute prefixes */ 1683 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1684 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1685 if (!pcbddc->current_level) { 1686 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1687 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1688 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1689 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1690 } else { 1691 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1692 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1693 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1694 len -= 15; /* remove "pc_bddc_coarse_" */ 1695 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1696 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1697 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1698 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1699 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1700 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1701 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1702 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1703 } 1704 1705 /* DIRICHLET PROBLEM */ 1706 if (dirichlet) { 1707 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1708 if (pcbddc->local_mat->symmetric_set) { 1709 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1710 } 1711 /* Matrix for Dirichlet problem is pcis->A_II */ 1712 n_D = pcis->n - pcis->n_B; 1713 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1714 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1715 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1716 /* default */ 1717 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1718 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1719 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1720 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1721 if (issbaij) { 1722 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1723 } else { 1724 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1725 } 1726 /* Allow user's customization */ 1727 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1728 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1729 } 1730 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1731 if (sub_schurs->reuse_mumps) { 1732 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1733 1734 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1735 } 1736 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1737 if (!n_D) { 1738 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1739 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1740 } 1741 /* Set Up KSP for Dirichlet problem of BDDC */ 1742 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1743 /* set ksp_D into pcis data */ 1744 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1745 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1746 pcis->ksp_D = pcbddc->ksp_D; 1747 } 1748 1749 /* NEUMANN PROBLEM */ 1750 A_RR = 0; 1751 if (neumann) { 1752 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1753 PetscInt ibs,mbs; 1754 PetscBool issbaij; 1755 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1756 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1757 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1758 if (pcbddc->ksp_R) { /* already created ksp */ 1759 PetscInt nn_R; 1760 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1761 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1762 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1763 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1764 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1765 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1766 reuse = MAT_INITIAL_MATRIX; 1767 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1768 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1769 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1770 reuse = MAT_INITIAL_MATRIX; 1771 } else { /* safe to reuse the matrix */ 1772 reuse = MAT_REUSE_MATRIX; 1773 } 1774 } 1775 /* last check */ 1776 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1777 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1778 reuse = MAT_INITIAL_MATRIX; 1779 } 1780 } else { /* first time, so we need to create the matrix */ 1781 reuse = MAT_INITIAL_MATRIX; 1782 } 1783 /* extract A_RR */ 1784 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1785 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1786 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1787 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1788 if (matis->A == pcbddc->local_mat) { 1789 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1790 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1791 } else { 1792 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1793 } 1794 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1795 if (matis->A == pcbddc->local_mat) { 1796 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1797 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1798 } else { 1799 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1800 } 1801 } 1802 if (!sub_schurs->reuse_mumps) { 1803 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1804 if (pcbddc->local_mat->symmetric_set) { 1805 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1806 } 1807 } else { 1808 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1809 1810 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1811 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1812 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1813 } 1814 if (!pcbddc->ksp_R) { /* create object if not present */ 1815 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1816 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1817 /* default */ 1818 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1819 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1820 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1821 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1822 if (issbaij) { 1823 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1824 } else { 1825 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1826 } 1827 /* Allow user's customization */ 1828 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1829 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1830 } 1831 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1832 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1833 if (!n_R) { 1834 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1835 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1836 } 1837 /* Reuse MUMPS solver if it is present */ 1838 if (sub_schurs->reuse_mumps) { 1839 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1840 1841 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1842 } 1843 /* Set Up KSP for Neumann problem of BDDC */ 1844 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1845 } 1846 /* free Neumann problem's matrix */ 1847 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1848 1849 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1850 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1851 if (pcbddc->dbg_flag) { 1852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1853 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1855 } 1856 if (dirichlet) { /* Dirichlet */ 1857 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1858 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1859 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1860 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1861 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1862 /* need to be adapted? */ 1863 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1864 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1865 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1866 /* print info */ 1867 if (pcbddc->dbg_flag) { 1868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1870 } 1871 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1872 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1873 } 1874 } 1875 if (neumann) { /* Neumann */ 1876 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1877 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1878 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1879 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1880 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1881 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1882 /* need to be adapted? */ 1883 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1884 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1885 /* print info */ 1886 if (pcbddc->dbg_flag) { 1887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1889 } 1890 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1891 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1892 } 1893 } 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1900 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1901 { 1902 PetscErrorCode ierr; 1903 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1904 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1905 1906 PetscFunctionBegin; 1907 if (!sub_schurs->reuse_mumps) { 1908 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1909 } 1910 if (!pcbddc->switch_static) { 1911 if (applytranspose && pcbddc->local_auxmat1) { 1912 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1913 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1914 } 1915 if (!sub_schurs->reuse_mumps) { 1916 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1917 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1918 } else { 1919 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1920 1921 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 } 1924 } else { 1925 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1926 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1927 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1928 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1929 if (applytranspose && pcbddc->local_auxmat1) { 1930 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1931 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1932 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1933 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1934 } 1935 } 1936 if (!sub_schurs->reuse_mumps) { 1937 if (applytranspose) { 1938 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1939 } else { 1940 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1941 } 1942 #if defined(PETSC_HAVE_MUMPS) 1943 } else { 1944 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1945 1946 if (applytranspose) { 1947 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1948 } else { 1949 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1950 } 1951 #endif 1952 } 1953 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1954 if (!pcbddc->switch_static) { 1955 if (!sub_schurs->reuse_mumps) { 1956 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 } else { 1959 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1960 1961 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1962 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1963 } 1964 if (!applytranspose && pcbddc->local_auxmat1) { 1965 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1966 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1967 } 1968 } else { 1969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1972 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1973 if (!applytranspose && pcbddc->local_auxmat1) { 1974 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1975 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1976 } 1977 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1978 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1979 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1980 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1981 } 1982 PetscFunctionReturn(0); 1983 } 1984 1985 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1986 #undef __FUNCT__ 1987 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1988 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1989 { 1990 PetscErrorCode ierr; 1991 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1992 PC_IS* pcis = (PC_IS*) (pc->data); 1993 const PetscScalar zero = 0.0; 1994 1995 PetscFunctionBegin; 1996 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1997 if (applytranspose) { 1998 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1999 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2000 } else { 2001 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2002 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2003 } 2004 2005 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2006 if (pcbddc->benign_change) { 2007 PetscScalar *array; 2008 2009 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2010 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2011 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2012 } 2013 2014 /* start communications from local primal nodes to rhs of coarse solver */ 2015 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2016 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 2019 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2020 /* TODO remove null space when doing multilevel */ 2021 if (pcbddc->coarse_ksp) { 2022 Vec rhs,sol; 2023 2024 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2025 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2026 if (applytranspose) { 2027 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2028 } else { 2029 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2030 } 2031 } 2032 2033 /* Local solution on R nodes */ 2034 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2035 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2036 } 2037 2038 /* communications from coarse sol to local primal nodes */ 2039 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2040 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2041 2042 /* Sum contributions from two levels */ 2043 if (applytranspose) { 2044 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2045 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2046 } else { 2047 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2048 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2049 } 2050 /* store p0 */ 2051 if (pcbddc->benign_change) { 2052 PetscScalar *array; 2053 2054 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2055 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2056 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2063 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2064 { 2065 PetscErrorCode ierr; 2066 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2067 PetscScalar *array; 2068 Vec from,to; 2069 2070 PetscFunctionBegin; 2071 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2072 from = pcbddc->coarse_vec; 2073 to = pcbddc->vec1_P; 2074 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2075 Vec tvec; 2076 2077 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2078 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2079 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2080 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2081 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2082 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2083 } 2084 } else { /* from local to global -> put data in coarse right hand side */ 2085 from = pcbddc->vec1_P; 2086 to = pcbddc->coarse_vec; 2087 } 2088 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2094 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2095 { 2096 PetscErrorCode ierr; 2097 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2098 PetscScalar *array; 2099 Vec from,to; 2100 2101 PetscFunctionBegin; 2102 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2103 from = pcbddc->coarse_vec; 2104 to = pcbddc->vec1_P; 2105 } else { /* from local to global -> put data in coarse right hand side */ 2106 from = pcbddc->vec1_P; 2107 to = pcbddc->coarse_vec; 2108 } 2109 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2110 if (smode == SCATTER_FORWARD) { 2111 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2112 Vec tvec; 2113 2114 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2115 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2116 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2117 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2118 } 2119 } else { 2120 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2121 ierr = VecResetArray(from);CHKERRQ(ierr); 2122 } 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 /* uncomment for testing purposes */ 2128 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2131 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2132 { 2133 PetscErrorCode ierr; 2134 PC_IS* pcis = (PC_IS*)(pc->data); 2135 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2136 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2137 /* one and zero */ 2138 PetscScalar one=1.0,zero=0.0; 2139 /* space to store constraints and their local indices */ 2140 PetscScalar *constraints_data; 2141 PetscInt *constraints_idxs,*constraints_idxs_B; 2142 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2143 PetscInt *constraints_n; 2144 /* iterators */ 2145 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2146 /* BLAS integers */ 2147 PetscBLASInt lwork,lierr; 2148 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2149 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2150 /* reuse */ 2151 PetscInt olocal_primal_size,olocal_primal_size_cc; 2152 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2153 /* change of basis */ 2154 PetscBool qr_needed; 2155 PetscBT change_basis,qr_needed_idx; 2156 /* auxiliary stuff */ 2157 PetscInt *nnz,*is_indices; 2158 PetscInt ncc; 2159 /* some quantities */ 2160 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2161 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2162 2163 PetscFunctionBegin; 2164 /* Destroy Mat objects computed previously */ 2165 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2166 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2167 /* save info on constraints from previous setup (if any) */ 2168 olocal_primal_size = pcbddc->local_primal_size; 2169 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2170 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2171 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2172 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2173 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2174 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2175 2176 /* print some info */ 2177 if (pcbddc->dbg_flag) { 2178 IS vertices; 2179 PetscInt nv,nedges,nfaces; 2180 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2181 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2182 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2183 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2184 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2189 } 2190 2191 if (!pcbddc->adaptive_selection) { 2192 IS ISForVertices,*ISForFaces,*ISForEdges; 2193 MatNullSpace nearnullsp; 2194 const Vec *nearnullvecs; 2195 Vec *localnearnullsp; 2196 PetscScalar *array; 2197 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2198 PetscBool nnsp_has_cnst; 2199 /* LAPACK working arrays for SVD or POD */ 2200 PetscBool skip_lapack,boolforchange; 2201 PetscScalar *work; 2202 PetscReal *singular_vals; 2203 #if defined(PETSC_USE_COMPLEX) 2204 PetscReal *rwork; 2205 #endif 2206 #if defined(PETSC_MISSING_LAPACK_GESVD) 2207 PetscScalar *temp_basis,*correlation_mat; 2208 #else 2209 PetscBLASInt dummy_int=1; 2210 PetscScalar dummy_scalar=1.; 2211 #endif 2212 2213 /* Get index sets for faces, edges and vertices from graph */ 2214 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2215 /* free unneeded index sets */ 2216 if (!pcbddc->use_vertices) { 2217 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2218 } 2219 if (!pcbddc->use_edges) { 2220 for (i=0;i<n_ISForEdges;i++) { 2221 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2222 } 2223 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2224 n_ISForEdges = 0; 2225 } 2226 if (!pcbddc->use_faces) { 2227 for (i=0;i<n_ISForFaces;i++) { 2228 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2229 } 2230 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2231 n_ISForFaces = 0; 2232 } 2233 2234 #if defined(PETSC_USE_DEBUG) 2235 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2236 Also use_change_of_basis should be consistent among processors */ 2237 if (pcbddc->NullSpace) { 2238 PetscBool tbool[2],gbool[2]; 2239 2240 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2241 pcbddc->use_change_of_basis = PETSC_TRUE; 2242 if (!ISForEdges) { 2243 pcbddc->use_change_on_faces = PETSC_TRUE; 2244 } 2245 } 2246 tbool[0] = pcbddc->use_change_of_basis; 2247 tbool[1] = pcbddc->use_change_on_faces; 2248 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2249 pcbddc->use_change_of_basis = gbool[0]; 2250 pcbddc->use_change_on_faces = gbool[1]; 2251 } 2252 #endif 2253 2254 /* check if near null space is attached to global mat */ 2255 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2256 if (nearnullsp) { 2257 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2258 /* remove any stored info */ 2259 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2260 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2261 /* store information for BDDC solver reuse */ 2262 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2263 pcbddc->onearnullspace = nearnullsp; 2264 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2265 for (i=0;i<nnsp_size;i++) { 2266 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2267 } 2268 } else { /* if near null space is not provided BDDC uses constants by default */ 2269 nnsp_size = 0; 2270 nnsp_has_cnst = PETSC_TRUE; 2271 } 2272 /* get max number of constraints on a single cc */ 2273 max_constraints = nnsp_size; 2274 if (nnsp_has_cnst) max_constraints++; 2275 2276 /* 2277 Evaluate maximum storage size needed by the procedure 2278 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2279 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2280 There can be multiple constraints per connected component 2281 */ 2282 n_vertices = 0; 2283 if (ISForVertices) { 2284 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2285 } 2286 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2287 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2288 2289 total_counts = n_ISForFaces+n_ISForEdges; 2290 total_counts *= max_constraints; 2291 total_counts += n_vertices; 2292 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2293 2294 total_counts = 0; 2295 max_size_of_constraint = 0; 2296 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2297 IS used_is; 2298 if (i<n_ISForEdges) { 2299 used_is = ISForEdges[i]; 2300 } else { 2301 used_is = ISForFaces[i-n_ISForEdges]; 2302 } 2303 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2304 total_counts += j; 2305 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2306 } 2307 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2308 2309 /* get local part of global near null space vectors */ 2310 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2311 for (k=0;k<nnsp_size;k++) { 2312 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2313 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2314 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2315 } 2316 2317 /* whether or not to skip lapack calls */ 2318 skip_lapack = PETSC_TRUE; 2319 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2320 2321 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2322 if (!skip_lapack) { 2323 PetscScalar temp_work; 2324 2325 #if defined(PETSC_MISSING_LAPACK_GESVD) 2326 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2327 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2328 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2329 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2330 #if defined(PETSC_USE_COMPLEX) 2331 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2332 #endif 2333 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2334 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2335 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2336 lwork = -1; 2337 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2338 #if !defined(PETSC_USE_COMPLEX) 2339 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2340 #else 2341 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2342 #endif 2343 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2344 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2345 #else /* on missing GESVD */ 2346 /* SVD */ 2347 PetscInt max_n,min_n; 2348 max_n = max_size_of_constraint; 2349 min_n = max_constraints; 2350 if (max_size_of_constraint < max_constraints) { 2351 min_n = max_size_of_constraint; 2352 max_n = max_constraints; 2353 } 2354 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2355 #if defined(PETSC_USE_COMPLEX) 2356 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2357 #endif 2358 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2359 lwork = -1; 2360 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2361 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2362 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2363 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2364 #if !defined(PETSC_USE_COMPLEX) 2365 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2366 #else 2367 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2368 #endif 2369 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2370 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2371 #endif /* on missing GESVD */ 2372 /* Allocate optimal workspace */ 2373 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2374 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2375 } 2376 /* Now we can loop on constraining sets */ 2377 total_counts = 0; 2378 constraints_idxs_ptr[0] = 0; 2379 constraints_data_ptr[0] = 0; 2380 /* vertices */ 2381 if (n_vertices) { 2382 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2383 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2384 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2385 for (i=0;i<n_vertices;i++) { 2386 constraints_n[total_counts] = 1; 2387 constraints_data[total_counts] = 1.0; 2388 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2389 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2390 total_counts++; 2391 } 2392 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2393 PetscBool used_vertex; 2394 for (i=0;i<n_vertices;i++) { 2395 used_vertex = PETSC_FALSE; 2396 k = 0; 2397 while (!used_vertex && k<nnsp_size) { 2398 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2399 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2400 constraints_n[total_counts] = 1; 2401 constraints_idxs[total_counts] = is_indices[i]; 2402 constraints_data[total_counts] = 1.0; 2403 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2404 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2405 total_counts++; 2406 used_vertex = PETSC_TRUE; 2407 } 2408 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2409 k++; 2410 } 2411 } 2412 } 2413 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2414 n_vertices = total_counts; 2415 } 2416 2417 /* edges and faces */ 2418 total_counts_cc = total_counts; 2419 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2420 IS used_is; 2421 PetscBool idxs_copied = PETSC_FALSE; 2422 2423 if (ncc<n_ISForEdges) { 2424 used_is = ISForEdges[ncc]; 2425 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2426 } else { 2427 used_is = ISForFaces[ncc-n_ISForEdges]; 2428 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2429 } 2430 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2431 2432 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2433 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2434 /* change of basis should not be performed on local periodic nodes */ 2435 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2436 if (nnsp_has_cnst) { 2437 PetscScalar quad_value; 2438 2439 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2440 idxs_copied = PETSC_TRUE; 2441 2442 if (!pcbddc->use_nnsp_true) { 2443 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2444 } else { 2445 quad_value = 1.0; 2446 } 2447 for (j=0;j<size_of_constraint;j++) { 2448 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2449 } 2450 temp_constraints++; 2451 total_counts++; 2452 } 2453 for (k=0;k<nnsp_size;k++) { 2454 PetscReal real_value; 2455 PetscScalar *ptr_to_data; 2456 2457 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2458 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2459 for (j=0;j<size_of_constraint;j++) { 2460 ptr_to_data[j] = array[is_indices[j]]; 2461 } 2462 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2463 /* check if array is null on the connected component */ 2464 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2465 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2466 if (real_value > 0.0) { /* keep indices and values */ 2467 temp_constraints++; 2468 total_counts++; 2469 if (!idxs_copied) { 2470 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2471 idxs_copied = PETSC_TRUE; 2472 } 2473 } 2474 } 2475 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2476 valid_constraints = temp_constraints; 2477 if (!pcbddc->use_nnsp_true && temp_constraints) { 2478 if (temp_constraints == 1) { /* just normalize the constraint */ 2479 PetscScalar norm,*ptr_to_data; 2480 2481 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2483 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2484 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2485 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2486 } else { /* perform SVD */ 2487 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2488 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2489 2490 #if defined(PETSC_MISSING_LAPACK_GESVD) 2491 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2492 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2493 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2494 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2495 from that computed using LAPACKgesvd 2496 -> This is due to a different computation of eigenvectors in LAPACKheev 2497 -> The quality of the POD-computed basis will be the same */ 2498 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2499 /* Store upper triangular part of correlation matrix */ 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2502 for (j=0;j<temp_constraints;j++) { 2503 for (k=0;k<j+1;k++) { 2504 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2505 } 2506 } 2507 /* compute eigenvalues and eigenvectors of correlation matrix */ 2508 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2509 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2510 #if !defined(PETSC_USE_COMPLEX) 2511 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2512 #else 2513 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2514 #endif 2515 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2516 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2517 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2518 j = 0; 2519 while (j < temp_constraints && singular_vals[j] < tol) j++; 2520 total_counts = total_counts-j; 2521 valid_constraints = temp_constraints-j; 2522 /* scale and copy POD basis into used quadrature memory */ 2523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2524 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2525 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2529 if (j<temp_constraints) { 2530 PetscInt ii; 2531 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2532 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2533 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2534 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2535 for (k=0;k<temp_constraints-j;k++) { 2536 for (ii=0;ii<size_of_constraint;ii++) { 2537 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2538 } 2539 } 2540 } 2541 #else /* on missing GESVD */ 2542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2543 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2545 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2546 #if !defined(PETSC_USE_COMPLEX) 2547 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2548 #else 2549 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2550 #endif 2551 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2552 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2553 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2554 k = temp_constraints; 2555 if (k > size_of_constraint) k = size_of_constraint; 2556 j = 0; 2557 while (j < k && singular_vals[k-j-1] < tol) j++; 2558 valid_constraints = k-j; 2559 total_counts = total_counts-temp_constraints+valid_constraints; 2560 #endif /* on missing GESVD */ 2561 } 2562 } 2563 /* update pointers information */ 2564 if (valid_constraints) { 2565 constraints_n[total_counts_cc] = valid_constraints; 2566 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2567 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2568 /* set change_of_basis flag */ 2569 if (boolforchange) { 2570 PetscBTSet(change_basis,total_counts_cc); 2571 } 2572 total_counts_cc++; 2573 } 2574 } 2575 /* free workspace */ 2576 if (!skip_lapack) { 2577 ierr = PetscFree(work);CHKERRQ(ierr); 2578 #if defined(PETSC_USE_COMPLEX) 2579 ierr = PetscFree(rwork);CHKERRQ(ierr); 2580 #endif 2581 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2582 #if defined(PETSC_MISSING_LAPACK_GESVD) 2583 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2584 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2585 #endif 2586 } 2587 for (k=0;k<nnsp_size;k++) { 2588 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2589 } 2590 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2591 /* free index sets of faces, edges and vertices */ 2592 for (i=0;i<n_ISForFaces;i++) { 2593 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2594 } 2595 if (n_ISForFaces) { 2596 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2597 } 2598 for (i=0;i<n_ISForEdges;i++) { 2599 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2600 } 2601 if (n_ISForEdges) { 2602 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2603 } 2604 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2605 } else { 2606 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2607 2608 total_counts = 0; 2609 n_vertices = 0; 2610 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2611 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2612 } 2613 max_constraints = 0; 2614 total_counts_cc = 0; 2615 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2616 total_counts += pcbddc->adaptive_constraints_n[i]; 2617 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2618 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2619 } 2620 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2621 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2622 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2623 constraints_data = pcbddc->adaptive_constraints_data; 2624 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2625 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2626 total_counts_cc = 0; 2627 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2628 if (pcbddc->adaptive_constraints_n[i]) { 2629 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2630 } 2631 } 2632 #if 0 2633 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2634 for (i=0;i<total_counts_cc;i++) { 2635 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2636 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2637 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2638 printf(" %d",constraints_idxs[j]); 2639 } 2640 printf("\n"); 2641 printf("number of cc: %d\n",constraints_n[i]); 2642 } 2643 for (i=0;i<n_vertices;i++) { 2644 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2645 } 2646 for (i=0;i<sub_schurs->n_subs;i++) { 2647 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2648 } 2649 #endif 2650 2651 max_size_of_constraint = 0; 2652 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2653 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2654 /* Change of basis */ 2655 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2656 if (pcbddc->use_change_of_basis) { 2657 for (i=0;i<sub_schurs->n_subs;i++) { 2658 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2659 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2660 } 2661 } 2662 } 2663 } 2664 pcbddc->local_primal_size = total_counts; 2665 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2666 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2667 2668 /* map constraints_idxs in boundary numbering */ 2669 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2670 if (i != constraints_idxs_ptr[total_counts_cc]) { 2671 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2672 } 2673 2674 /* Create constraint matrix */ 2675 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2676 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2677 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2678 2679 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2680 /* determine if a QR strategy is needed for change of basis */ 2681 qr_needed = PETSC_FALSE; 2682 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2683 total_primal_vertices=0; 2684 pcbddc->local_primal_size_cc = 0; 2685 for (i=0;i<total_counts_cc;i++) { 2686 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2687 if (size_of_constraint == 1) { 2688 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2689 pcbddc->local_primal_size_cc += 1; 2690 } else if (PetscBTLookup(change_basis,i)) { 2691 for (k=0;k<constraints_n[i];k++) { 2692 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2693 } 2694 pcbddc->local_primal_size_cc += constraints_n[i]; 2695 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2696 PetscBTSet(qr_needed_idx,i); 2697 qr_needed = PETSC_TRUE; 2698 } 2699 } else { 2700 pcbddc->local_primal_size_cc += 1; 2701 } 2702 } 2703 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2704 pcbddc->n_vertices = total_primal_vertices; 2705 /* permute indices in order to have a sorted set of vertices */ 2706 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2707 2708 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2709 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2710 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2711 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2712 2713 /* nonzero structure of constraint matrix */ 2714 /* and get reference dof for local constraints */ 2715 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2716 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2717 2718 j = total_primal_vertices; 2719 total_counts = total_primal_vertices; 2720 cum = total_primal_vertices; 2721 for (i=n_vertices;i<total_counts_cc;i++) { 2722 if (!PetscBTLookup(change_basis,i)) { 2723 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2724 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2725 cum++; 2726 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2727 for (k=0;k<constraints_n[i];k++) { 2728 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2729 nnz[j+k] = size_of_constraint; 2730 } 2731 j += constraints_n[i]; 2732 } 2733 } 2734 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2735 ierr = PetscFree(nnz);CHKERRQ(ierr); 2736 2737 /* set values in constraint matrix */ 2738 for (i=0;i<total_primal_vertices;i++) { 2739 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2740 } 2741 total_counts = total_primal_vertices; 2742 for (i=n_vertices;i<total_counts_cc;i++) { 2743 if (!PetscBTLookup(change_basis,i)) { 2744 PetscInt *cols; 2745 2746 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2747 cols = constraints_idxs+constraints_idxs_ptr[i]; 2748 for (k=0;k<constraints_n[i];k++) { 2749 PetscInt row = total_counts+k; 2750 PetscScalar *vals; 2751 2752 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2753 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2754 } 2755 total_counts += constraints_n[i]; 2756 } 2757 } 2758 /* assembling */ 2759 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 2762 /* 2763 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2764 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2765 */ 2766 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2767 if (pcbddc->use_change_of_basis) { 2768 /* dual and primal dofs on a single cc */ 2769 PetscInt dual_dofs,primal_dofs; 2770 /* working stuff for GEQRF */ 2771 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2772 PetscBLASInt lqr_work; 2773 /* working stuff for UNGQR */ 2774 PetscScalar *gqr_work,lgqr_work_t; 2775 PetscBLASInt lgqr_work; 2776 /* working stuff for TRTRS */ 2777 PetscScalar *trs_rhs; 2778 PetscBLASInt Blas_NRHS; 2779 /* pointers for values insertion into change of basis matrix */ 2780 PetscInt *start_rows,*start_cols; 2781 PetscScalar *start_vals; 2782 /* working stuff for values insertion */ 2783 PetscBT is_primal; 2784 PetscInt *aux_primal_numbering_B; 2785 /* matrix sizes */ 2786 PetscInt global_size,local_size; 2787 /* temporary change of basis */ 2788 Mat localChangeOfBasisMatrix; 2789 /* extra space for debugging */ 2790 PetscScalar *dbg_work; 2791 2792 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2793 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2794 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2795 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2796 /* nonzeros for local mat */ 2797 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2798 for (i=0;i<pcis->n;i++) nnz[i]=1; 2799 for (i=n_vertices;i<total_counts_cc;i++) { 2800 if (PetscBTLookup(change_basis,i)) { 2801 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2802 if (PetscBTLookup(qr_needed_idx,i)) { 2803 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2804 } else { 2805 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2806 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2807 } 2808 } 2809 } 2810 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2811 ierr = PetscFree(nnz);CHKERRQ(ierr); 2812 /* Set initial identity in the matrix */ 2813 for (i=0;i<pcis->n;i++) { 2814 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2815 } 2816 2817 if (pcbddc->dbg_flag) { 2818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2820 } 2821 2822 2823 /* Now we loop on the constraints which need a change of basis */ 2824 /* 2825 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2826 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2827 2828 Basic blocks of change of basis matrix T computed by 2829 2830 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2831 2832 | 1 0 ... 0 s_1/S | 2833 | 0 1 ... 0 s_2/S | 2834 | ... | 2835 | 0 ... 1 s_{n-1}/S | 2836 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2837 2838 with S = \sum_{i=1}^n s_i^2 2839 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2840 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2841 2842 - QR decomposition of constraints otherwise 2843 */ 2844 if (qr_needed) { 2845 /* space to store Q */ 2846 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2847 /* first we issue queries for optimal work */ 2848 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2849 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2850 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2851 lqr_work = -1; 2852 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2853 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2854 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2855 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2856 lgqr_work = -1; 2857 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2858 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2859 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2860 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2861 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2862 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2863 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2864 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2865 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2866 /* array to store scaling factors for reflectors */ 2867 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2868 /* array to store rhs and solution of triangular solver */ 2869 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2870 /* allocating workspace for check */ 2871 if (pcbddc->dbg_flag) { 2872 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2873 } 2874 } 2875 /* array to store whether a node is primal or not */ 2876 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2877 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2878 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2879 if (i != total_primal_vertices) { 2880 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2881 } 2882 for (i=0;i<total_primal_vertices;i++) { 2883 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2884 } 2885 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2886 2887 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2888 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2889 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2890 if (PetscBTLookup(change_basis,total_counts)) { 2891 /* get constraint info */ 2892 primal_dofs = constraints_n[total_counts]; 2893 dual_dofs = size_of_constraint-primal_dofs; 2894 2895 if (pcbddc->dbg_flag) { 2896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2897 } 2898 2899 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2900 2901 /* copy quadrature constraints for change of basis check */ 2902 if (pcbddc->dbg_flag) { 2903 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2904 } 2905 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2906 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2907 2908 /* compute QR decomposition of constraints */ 2909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2910 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2912 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2913 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2914 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2915 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2916 2917 /* explictly compute R^-T */ 2918 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2919 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2920 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2921 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2923 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2924 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2925 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2926 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2927 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2928 2929 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2930 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2931 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2932 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2933 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2934 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2935 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2936 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2937 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2938 2939 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2940 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2941 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2942 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2943 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2944 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2946 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2949 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2951 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2952 2953 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2954 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2955 /* insert cols for primal dofs */ 2956 for (j=0;j<primal_dofs;j++) { 2957 start_vals = &qr_basis[j*size_of_constraint]; 2958 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2959 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2960 } 2961 /* insert cols for dual dofs */ 2962 for (j=0,k=0;j<dual_dofs;k++) { 2963 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2964 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2965 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2966 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2967 j++; 2968 } 2969 } 2970 2971 /* check change of basis */ 2972 if (pcbddc->dbg_flag) { 2973 PetscInt ii,jj; 2974 PetscBool valid_qr=PETSC_TRUE; 2975 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2980 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2981 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2982 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2983 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2984 for (jj=0;jj<size_of_constraint;jj++) { 2985 for (ii=0;ii<primal_dofs;ii++) { 2986 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2987 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2988 } 2989 } 2990 if (!valid_qr) { 2991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2992 for (jj=0;jj<size_of_constraint;jj++) { 2993 for (ii=0;ii<primal_dofs;ii++) { 2994 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2995 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2996 } 2997 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2998 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2999 } 3000 } 3001 } 3002 } else { 3003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3004 } 3005 } 3006 } else { /* simple transformation block */ 3007 PetscInt row,col; 3008 PetscScalar val,norm; 3009 3010 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3011 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3012 for (j=0;j<size_of_constraint;j++) { 3013 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3014 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3015 if (!PetscBTLookup(is_primal,row_B)) { 3016 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3017 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3018 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3019 } else { 3020 for (k=0;k<size_of_constraint;k++) { 3021 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3022 if (row != col) { 3023 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3024 } else { 3025 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3026 } 3027 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3028 } 3029 } 3030 } 3031 if (pcbddc->dbg_flag) { 3032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3033 } 3034 } 3035 } else { 3036 if (pcbddc->dbg_flag) { 3037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3038 } 3039 } 3040 } 3041 3042 /* free workspace */ 3043 if (qr_needed) { 3044 if (pcbddc->dbg_flag) { 3045 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3046 } 3047 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3048 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3049 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3050 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3051 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3052 } 3053 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3054 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 3057 /* assembling of global change of variable */ 3058 { 3059 Mat tmat; 3060 PetscInt bs; 3061 3062 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3063 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3064 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3065 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3066 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3067 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3068 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3069 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3070 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3071 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3072 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3073 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3074 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3075 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3076 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3077 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3078 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3079 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3080 } 3081 /* check */ 3082 if (pcbddc->dbg_flag) { 3083 PetscReal error; 3084 Vec x,x_change; 3085 3086 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3087 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3088 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3089 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3090 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3091 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3092 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3093 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3094 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3095 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3096 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3097 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3100 ierr = VecDestroy(&x);CHKERRQ(ierr); 3101 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3102 } 3103 3104 /* adapt sub_schurs computed (if any) */ 3105 if (pcbddc->use_deluxe_scaling) { 3106 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3107 if (sub_schurs->S_Ej_all) { 3108 Mat S_new,tmat; 3109 IS is_all_N; 3110 3111 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3112 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3113 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3114 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3115 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3116 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3117 sub_schurs->S_Ej_all = S_new; 3118 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3119 if (sub_schurs->sum_S_Ej_all) { 3120 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3121 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3122 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3123 sub_schurs->sum_S_Ej_all = S_new; 3124 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3125 } 3126 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3127 } 3128 } 3129 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3130 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3131 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3132 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3133 } 3134 3135 /* set up change of basis context */ 3136 if (pcbddc->ChangeOfBasisMatrix) { 3137 PCBDDCChange_ctx change_ctx; 3138 3139 if (!pcbddc->new_global_mat) { 3140 PetscInt global_size,local_size; 3141 3142 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3143 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3144 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3145 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3146 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3147 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3148 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3149 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3150 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3151 } else { 3152 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3153 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3154 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3155 } 3156 if (!pcbddc->user_ChangeOfBasisMatrix) { 3157 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3158 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3159 } else { 3160 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3161 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3162 } 3163 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3164 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3165 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3166 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 } 3168 3169 /* add pressure dof to set of primal nodes for numbering purposes */ 3170 if (pcbddc->zerodiag) { 3171 const PetscInt *idxs; 3172 PetscInt nz; 3173 3174 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 3175 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3176 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3177 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3178 ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3179 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3180 pcbddc->local_primal_size_cc++; 3181 pcbddc->local_primal_size++; 3182 } 3183 3184 /* check if a new primal space has been introduced (also take into account benign trick) */ 3185 pcbddc->new_primal_space_local = PETSC_TRUE; 3186 if (olocal_primal_size == pcbddc->local_primal_size) { 3187 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3188 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3189 if (!pcbddc->new_primal_space_local) { 3190 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3191 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3192 } 3193 } 3194 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3195 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3196 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3197 3198 /* flush dbg viewer */ 3199 if (pcbddc->dbg_flag) { 3200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3201 } 3202 3203 /* free workspace */ 3204 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3205 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3206 if (!pcbddc->adaptive_selection) { 3207 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3208 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3209 } else { 3210 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3211 pcbddc->adaptive_constraints_idxs_ptr, 3212 pcbddc->adaptive_constraints_data_ptr, 3213 pcbddc->adaptive_constraints_idxs, 3214 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3215 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3216 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3217 } 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3223 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3224 { 3225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3226 PC_IS *pcis = (PC_IS*)pc->data; 3227 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3228 PetscInt ierr,i,vertex_size,N; 3229 PetscViewer viewer=pcbddc->dbg_viewer; 3230 3231 PetscFunctionBegin; 3232 /* Reset previously computed graph */ 3233 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3234 /* Init local Graph struct */ 3235 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3236 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3237 3238 /* Check validity of the csr graph passed in by the user */ 3239 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3240 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3241 } 3242 3243 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3244 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3245 PetscInt *xadj,*adjncy; 3246 PetscInt nvtxs; 3247 PetscBool flg_row=PETSC_FALSE; 3248 3249 if (pcbddc->use_local_adj) { 3250 3251 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3252 if (flg_row) { 3253 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3254 pcbddc->computed_rowadj = PETSC_TRUE; 3255 } 3256 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3257 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3258 IS is_dummy; 3259 ISLocalToGlobalMapping l2gmap_dummy; 3260 PetscInt j,sum; 3261 PetscInt *cxadj,*cadjncy; 3262 const PetscInt *idxs; 3263 PCBDDCGraph graph; 3264 PetscBT is_on_boundary; 3265 3266 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3267 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3268 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3269 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3270 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3271 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3272 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3273 if (flg_row) { 3274 graph->xadj = xadj; 3275 graph->adjncy = adjncy; 3276 } 3277 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3278 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3279 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3280 3281 if (pcbddc->dbg_flag) { 3282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3283 for (i=0;i<graph->ncc;i++) { 3284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3285 } 3286 } 3287 3288 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3289 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3290 for (i=0;i<pcis->n_B;i++) { 3291 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3292 } 3293 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3294 3295 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3296 sum = 0; 3297 for (i=0;i<graph->ncc;i++) { 3298 PetscInt sizecc = 0; 3299 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3300 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3301 sizecc++; 3302 } 3303 } 3304 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3305 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3306 cxadj[graph->queue[j]] = sizecc; 3307 } 3308 } 3309 sum += sizecc*sizecc; 3310 } 3311 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3312 sum = 0; 3313 for (i=0;i<pcis->n;i++) { 3314 PetscInt temp = cxadj[i]; 3315 cxadj[i] = sum; 3316 sum += temp; 3317 } 3318 cxadj[pcis->n] = sum; 3319 for (i=0;i<graph->ncc;i++) { 3320 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3321 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3322 PetscInt k,sizecc = 0; 3323 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3324 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3325 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3326 sizecc++; 3327 } 3328 } 3329 } 3330 } 3331 } 3332 if (sum) { 3333 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3334 } else { 3335 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3336 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3337 } 3338 graph->xadj = 0; 3339 graph->adjncy = 0; 3340 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3341 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3342 } 3343 } 3344 if (pcbddc->dbg_flag) { 3345 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3346 } 3347 3348 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3349 vertex_size = 1; 3350 if (pcbddc->user_provided_isfordofs) { 3351 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3352 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3353 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3354 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3355 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3356 } 3357 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3358 pcbddc->n_ISForDofs = 0; 3359 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3360 } 3361 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3362 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3363 } else { 3364 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3365 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3366 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3367 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3368 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3369 } 3370 } 3371 } 3372 3373 /* Setup of Graph */ 3374 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3375 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3376 } 3377 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3378 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3379 } 3380 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3381 3382 /* Graph's connected components analysis */ 3383 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3384 3385 /* print some info to stdout */ 3386 if (pcbddc->dbg_flag) { 3387 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3388 } 3389 3390 /* mark topography has done */ 3391 pcbddc->recompute_topography = PETSC_FALSE; 3392 PetscFunctionReturn(0); 3393 } 3394 3395 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3396 #undef __FUNCT__ 3397 #define __FUNCT__ "PCBDDCSubsetNumbering" 3398 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3399 { 3400 PetscSF sf; 3401 PetscLayout map; 3402 const PetscInt *idxs; 3403 PetscInt *leaf_data,*root_data,*gidxs; 3404 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3405 PetscInt n_n,nlocals,start,first_index; 3406 PetscMPIInt commsize; 3407 PetscBool first_found; 3408 PetscErrorCode ierr; 3409 3410 PetscFunctionBegin; 3411 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3412 if (subset_mult) { 3413 PetscCheckSameComm(subset,1,subset_mult,2); 3414 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3415 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3416 } 3417 /* create workspace layout for computing global indices of subset */ 3418 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3419 lbounds[0] = lbounds[1] = 0; 3420 for (i=0;i<n;i++) { 3421 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3422 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3423 } 3424 lbounds[0] = -lbounds[0]; 3425 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3426 gbounds[0] = -gbounds[0]; 3427 N = gbounds[1] - gbounds[0] + 1; 3428 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3429 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3430 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3431 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3432 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3433 3434 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3435 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3436 if (subset_mult) { 3437 const PetscInt* idxs_mult; 3438 3439 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3440 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3441 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3442 } else { 3443 for (i=0;i<n;i++) leaf_data[i] = 1; 3444 } 3445 /* local size of new subset */ 3446 n_n = 0; 3447 for (i=0;i<n;i++) n_n += leaf_data[i]; 3448 3449 /* global indexes in layout */ 3450 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3451 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3452 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3453 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3454 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3455 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3456 3457 /* reduce from leaves to roots */ 3458 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3459 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3460 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3461 3462 /* count indexes in local part of layout */ 3463 nlocals = 0; 3464 first_index = -1; 3465 first_found = PETSC_FALSE; 3466 for (i=0;i<Nl;i++) { 3467 if (!first_found && root_data[i]) { 3468 first_found = PETSC_TRUE; 3469 first_index = i; 3470 } 3471 nlocals += root_data[i]; 3472 } 3473 3474 /* cumulative of number of indexes and size of subset without holes */ 3475 #if defined(PETSC_HAVE_MPI_EXSCAN) 3476 start = 0; 3477 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3478 #else 3479 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3480 start = start-nlocals; 3481 #endif 3482 3483 if (N_n) { /* compute total size of new subset if requested */ 3484 *N_n = start + nlocals; 3485 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3486 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3487 } 3488 3489 /* adapt root data with cumulative */ 3490 if (first_found) { 3491 PetscInt old_index; 3492 3493 root_data[first_index] += start; 3494 old_index = first_index; 3495 for (i=first_index+1;i<Nl;i++) { 3496 if (root_data[i]) { 3497 root_data[i] += root_data[old_index]; 3498 old_index = i; 3499 } 3500 } 3501 } 3502 3503 /* from roots to leaves */ 3504 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3505 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3506 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3507 3508 /* create new IS with global indexes without holes */ 3509 if (subset_mult) { 3510 const PetscInt* idxs_mult; 3511 PetscInt cum; 3512 3513 cum = 0; 3514 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3515 for (i=0;i<n;i++) { 3516 PetscInt j; 3517 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3518 } 3519 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3520 } else { 3521 for (i=0;i<n;i++) { 3522 gidxs[i] = leaf_data[i]-1; 3523 } 3524 } 3525 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3526 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3527 PetscFunctionReturn(0); 3528 } 3529 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3532 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3533 { 3534 PetscInt i,j; 3535 PetscScalar *alphas; 3536 PetscErrorCode ierr; 3537 3538 PetscFunctionBegin; 3539 /* this implements stabilized Gram-Schmidt */ 3540 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3541 for (i=0;i<n;i++) { 3542 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3543 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3544 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3545 } 3546 ierr = PetscFree(alphas);CHKERRQ(ierr); 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatISGetSubassemblingPattern" 3552 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3553 { 3554 IS ranks_send_to; 3555 PetscInt n_neighs,*neighs,*n_shared,**shared; 3556 PetscMPIInt size,rank,color; 3557 PetscInt *xadj,*adjncy; 3558 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3559 PetscInt i,local_size,threshold=0; 3560 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3561 PetscSubcomm subcomm; 3562 PetscErrorCode ierr; 3563 3564 PetscFunctionBegin; 3565 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3566 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3567 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3568 3569 /* Get info on mapping */ 3570 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3571 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3572 3573 /* build local CSR graph of subdomains' connectivity */ 3574 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3575 xadj[0] = 0; 3576 xadj[1] = PetscMax(n_neighs-1,0); 3577 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3578 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3579 3580 if (threshold) { 3581 PetscInt xadj_count = 0; 3582 for (i=1;i<n_neighs;i++) { 3583 if (n_shared[i] > threshold) { 3584 adjncy[xadj_count] = neighs[i]; 3585 adjncy_wgt[xadj_count] = n_shared[i]; 3586 xadj_count++; 3587 } 3588 } 3589 xadj[1] = xadj_count; 3590 } else { 3591 if (xadj[1]) { 3592 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3593 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3594 } 3595 } 3596 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3597 if (use_square) { 3598 for (i=0;i<xadj[1];i++) { 3599 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3600 } 3601 } 3602 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3603 3604 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3605 3606 /* 3607 Restrict work on active processes only. 3608 */ 3609 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3610 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3611 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3612 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3613 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3614 if (color) { 3615 ierr = PetscFree(xadj);CHKERRQ(ierr); 3616 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3617 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3618 } else { 3619 Mat subdomain_adj; 3620 IS new_ranks,new_ranks_contig; 3621 MatPartitioning partitioner; 3622 PetscInt prank,rstart=0,rend=0; 3623 PetscInt *is_indices,*oldranks; 3624 PetscBool aggregate; 3625 3626 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3627 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3628 prank = rank; 3629 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3630 /* 3631 for (i=0;i<size;i++) { 3632 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3633 } 3634 */ 3635 for (i=0;i<xadj[1];i++) { 3636 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3637 } 3638 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3639 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3640 if (aggregate) { 3641 PetscInt lrows,row,ncols,*cols; 3642 PetscMPIInt nrank; 3643 PetscScalar *vals; 3644 3645 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3646 lrows = 0; 3647 if (nrank<redprocs) { 3648 lrows = size/redprocs; 3649 if (nrank<size%redprocs) lrows++; 3650 } 3651 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3652 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3653 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3654 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3655 row = nrank; 3656 ncols = xadj[1]-xadj[0]; 3657 cols = adjncy; 3658 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3659 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3660 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3661 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3662 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3663 ierr = PetscFree(xadj);CHKERRQ(ierr); 3664 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3665 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3666 ierr = PetscFree(vals);CHKERRQ(ierr); 3667 } else { 3668 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3669 } 3670 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3671 3672 /* Partition */ 3673 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3674 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3675 if (use_vwgt) { 3676 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3677 v_wgt[0] = local_size; 3678 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3679 } 3680 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3681 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3682 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3683 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3684 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3685 3686 /* renumber new_ranks to avoid "holes" in new set of processors */ 3687 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3688 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3689 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3690 if (!redprocs) { 3691 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3692 } else { 3693 PetscInt idxs[1]; 3694 PetscMPIInt tag; 3695 MPI_Request *reqs; 3696 3697 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3698 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3699 for (i=rstart;i<rend;i++) { 3700 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3701 } 3702 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3703 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3704 ierr = PetscFree(reqs);CHKERRQ(ierr); 3705 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3706 } 3707 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3708 /* clean up */ 3709 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3710 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3711 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3712 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3713 } 3714 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3715 3716 /* assemble parallel IS for sends */ 3717 i = 1; 3718 if (color) i=0; 3719 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3720 /* get back IS */ 3721 *is_sends = ranks_send_to; 3722 PetscFunctionReturn(0); 3723 } 3724 3725 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3726 3727 #undef __FUNCT__ 3728 #define __FUNCT__ "MatISSubassemble" 3729 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3730 { 3731 Mat local_mat; 3732 IS is_sends_internal; 3733 PetscInt rows,cols,new_local_rows; 3734 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3735 PetscBool ismatis,isdense,newisdense,destroy_mat; 3736 ISLocalToGlobalMapping l2gmap; 3737 PetscInt* l2gmap_indices; 3738 const PetscInt* is_indices; 3739 MatType new_local_type; 3740 /* buffers */ 3741 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3742 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3743 PetscInt *recv_buffer_idxs_local; 3744 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3745 /* MPI */ 3746 MPI_Comm comm,comm_n; 3747 PetscSubcomm subcomm; 3748 PetscMPIInt n_sends,n_recvs,commsize; 3749 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3750 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3751 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3752 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3753 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3754 PetscErrorCode ierr; 3755 3756 PetscFunctionBegin; 3757 /* TODO: add missing checks */ 3758 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3759 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3760 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3761 PetscValidLogicalCollectiveInt(mat,nis,7); 3762 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3763 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3764 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3765 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3766 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3767 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3768 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3769 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3770 PetscInt mrows,mcols,mnrows,mncols; 3771 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3772 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3773 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3774 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3775 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3776 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3777 } 3778 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3779 PetscValidLogicalCollectiveInt(mat,bs,0); 3780 /* prepare IS for sending if not provided */ 3781 if (!is_sends) { 3782 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3783 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3784 } else { 3785 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3786 is_sends_internal = is_sends; 3787 } 3788 3789 /* get comm */ 3790 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3791 3792 /* compute number of sends */ 3793 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3794 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3795 3796 /* compute number of receives */ 3797 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3798 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3799 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3800 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3801 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3802 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3803 ierr = PetscFree(iflags);CHKERRQ(ierr); 3804 3805 /* restrict comm if requested */ 3806 subcomm = 0; 3807 destroy_mat = PETSC_FALSE; 3808 if (restrict_comm) { 3809 PetscMPIInt color,subcommsize; 3810 3811 color = 0; 3812 if (restrict_full) { 3813 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3814 } else { 3815 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3816 } 3817 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3818 subcommsize = commsize - subcommsize; 3819 /* check if reuse has been requested */ 3820 if (reuse == MAT_REUSE_MATRIX) { 3821 if (*mat_n) { 3822 PetscMPIInt subcommsize2; 3823 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3824 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3825 comm_n = PetscObjectComm((PetscObject)*mat_n); 3826 } else { 3827 comm_n = PETSC_COMM_SELF; 3828 } 3829 } else { /* MAT_INITIAL_MATRIX */ 3830 PetscMPIInt rank; 3831 3832 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3833 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3834 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3835 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3836 comm_n = PetscSubcommChild(subcomm); 3837 } 3838 /* flag to destroy *mat_n if not significative */ 3839 if (color) destroy_mat = PETSC_TRUE; 3840 } else { 3841 comm_n = comm; 3842 } 3843 3844 /* prepare send/receive buffers */ 3845 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3846 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3847 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3848 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3849 if (nis) { 3850 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3851 } 3852 3853 /* Get data from local matrices */ 3854 if (!isdense) { 3855 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3856 /* TODO: See below some guidelines on how to prepare the local buffers */ 3857 /* 3858 send_buffer_vals should contain the raw values of the local matrix 3859 send_buffer_idxs should contain: 3860 - MatType_PRIVATE type 3861 - PetscInt size_of_l2gmap 3862 - PetscInt global_row_indices[size_of_l2gmap] 3863 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3864 */ 3865 } else { 3866 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3867 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3868 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3869 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3870 send_buffer_idxs[1] = i; 3871 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3872 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3873 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3874 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3875 for (i=0;i<n_sends;i++) { 3876 ilengths_vals[is_indices[i]] = len*len; 3877 ilengths_idxs[is_indices[i]] = len+2; 3878 } 3879 } 3880 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3881 /* additional is (if any) */ 3882 if (nis) { 3883 PetscMPIInt psum; 3884 PetscInt j; 3885 for (j=0,psum=0;j<nis;j++) { 3886 PetscInt plen; 3887 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3888 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3889 psum += len+1; /* indices + lenght */ 3890 } 3891 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3892 for (j=0,psum=0;j<nis;j++) { 3893 PetscInt plen; 3894 const PetscInt *is_array_idxs; 3895 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3896 send_buffer_idxs_is[psum] = plen; 3897 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3898 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3899 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3900 psum += plen+1; /* indices + lenght */ 3901 } 3902 for (i=0;i<n_sends;i++) { 3903 ilengths_idxs_is[is_indices[i]] = psum; 3904 } 3905 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3906 } 3907 3908 buf_size_idxs = 0; 3909 buf_size_vals = 0; 3910 buf_size_idxs_is = 0; 3911 for (i=0;i<n_recvs;i++) { 3912 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3913 buf_size_vals += (PetscInt)olengths_vals[i]; 3914 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3915 } 3916 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3917 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3918 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3919 3920 /* get new tags for clean communications */ 3921 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3922 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3923 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3924 3925 /* allocate for requests */ 3926 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3927 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3928 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3929 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3930 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3931 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3932 3933 /* communications */ 3934 ptr_idxs = recv_buffer_idxs; 3935 ptr_vals = recv_buffer_vals; 3936 ptr_idxs_is = recv_buffer_idxs_is; 3937 for (i=0;i<n_recvs;i++) { 3938 source_dest = onodes[i]; 3939 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3940 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3941 ptr_idxs += olengths_idxs[i]; 3942 ptr_vals += olengths_vals[i]; 3943 if (nis) { 3944 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3945 ptr_idxs_is += olengths_idxs_is[i]; 3946 } 3947 } 3948 for (i=0;i<n_sends;i++) { 3949 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3950 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3951 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3952 if (nis) { 3953 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3954 } 3955 } 3956 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3957 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3958 3959 /* assemble new l2g map */ 3960 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3961 ptr_idxs = recv_buffer_idxs; 3962 new_local_rows = 0; 3963 for (i=0;i<n_recvs;i++) { 3964 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3965 ptr_idxs += olengths_idxs[i]; 3966 } 3967 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3968 ptr_idxs = recv_buffer_idxs; 3969 new_local_rows = 0; 3970 for (i=0;i<n_recvs;i++) { 3971 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3972 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3973 ptr_idxs += olengths_idxs[i]; 3974 } 3975 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3976 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3977 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3978 3979 /* infer new local matrix type from received local matrices type */ 3980 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3981 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3982 if (n_recvs) { 3983 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3984 ptr_idxs = recv_buffer_idxs; 3985 for (i=0;i<n_recvs;i++) { 3986 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3987 new_local_type_private = MATAIJ_PRIVATE; 3988 break; 3989 } 3990 ptr_idxs += olengths_idxs[i]; 3991 } 3992 switch (new_local_type_private) { 3993 case MATDENSE_PRIVATE: 3994 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3995 new_local_type = MATSEQAIJ; 3996 bs = 1; 3997 } else { /* if I receive only 1 dense matrix */ 3998 new_local_type = MATSEQDENSE; 3999 bs = 1; 4000 } 4001 break; 4002 case MATAIJ_PRIVATE: 4003 new_local_type = MATSEQAIJ; 4004 bs = 1; 4005 break; 4006 case MATBAIJ_PRIVATE: 4007 new_local_type = MATSEQBAIJ; 4008 break; 4009 case MATSBAIJ_PRIVATE: 4010 new_local_type = MATSEQSBAIJ; 4011 break; 4012 default: 4013 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4014 break; 4015 } 4016 } else { /* by default, new_local_type is seqdense */ 4017 new_local_type = MATSEQDENSE; 4018 bs = 1; 4019 } 4020 4021 /* create MATIS object if needed */ 4022 if (reuse == MAT_INITIAL_MATRIX) { 4023 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4024 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4025 } else { 4026 /* it also destroys the local matrices */ 4027 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4028 } 4029 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4030 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4031 4032 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4033 4034 /* Global to local map of received indices */ 4035 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4036 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4037 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4038 4039 /* restore attributes -> type of incoming data and its size */ 4040 buf_size_idxs = 0; 4041 for (i=0;i<n_recvs;i++) { 4042 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4043 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4044 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4045 } 4046 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4047 4048 /* set preallocation */ 4049 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4050 if (!newisdense) { 4051 PetscInt *new_local_nnz=0; 4052 4053 ptr_vals = recv_buffer_vals; 4054 ptr_idxs = recv_buffer_idxs_local; 4055 if (n_recvs) { 4056 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4057 } 4058 for (i=0;i<n_recvs;i++) { 4059 PetscInt j; 4060 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4061 for (j=0;j<*(ptr_idxs+1);j++) { 4062 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4063 } 4064 } else { 4065 /* TODO */ 4066 } 4067 ptr_idxs += olengths_idxs[i]; 4068 } 4069 if (new_local_nnz) { 4070 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4071 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4072 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4073 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4074 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4075 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4076 } else { 4077 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4078 } 4079 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4080 } else { 4081 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4082 } 4083 4084 /* set values */ 4085 ptr_vals = recv_buffer_vals; 4086 ptr_idxs = recv_buffer_idxs_local; 4087 for (i=0;i<n_recvs;i++) { 4088 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4089 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4090 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4091 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4092 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4093 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4094 } else { 4095 /* TODO */ 4096 } 4097 ptr_idxs += olengths_idxs[i]; 4098 ptr_vals += olengths_vals[i]; 4099 } 4100 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4101 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4102 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4103 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4104 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4105 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4106 4107 #if 0 4108 if (!restrict_comm) { /* check */ 4109 Vec lvec,rvec; 4110 PetscReal infty_error; 4111 4112 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4113 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4114 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4115 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4116 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4117 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4118 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4119 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4120 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4121 } 4122 #endif 4123 4124 /* assemble new additional is (if any) */ 4125 if (nis) { 4126 PetscInt **temp_idxs,*count_is,j,psum; 4127 4128 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4129 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4130 ptr_idxs = recv_buffer_idxs_is; 4131 psum = 0; 4132 for (i=0;i<n_recvs;i++) { 4133 for (j=0;j<nis;j++) { 4134 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4135 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4136 psum += plen; 4137 ptr_idxs += plen+1; /* shift pointer to received data */ 4138 } 4139 } 4140 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4141 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4142 for (i=1;i<nis;i++) { 4143 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4144 } 4145 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4146 ptr_idxs = recv_buffer_idxs_is; 4147 for (i=0;i<n_recvs;i++) { 4148 for (j=0;j<nis;j++) { 4149 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4150 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4151 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4152 ptr_idxs += plen+1; /* shift pointer to received data */ 4153 } 4154 } 4155 for (i=0;i<nis;i++) { 4156 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4157 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4158 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4159 } 4160 ierr = PetscFree(count_is);CHKERRQ(ierr); 4161 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4162 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4163 } 4164 /* free workspace */ 4165 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4166 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4167 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4168 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4169 if (isdense) { 4170 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4171 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4172 } else { 4173 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4174 } 4175 if (nis) { 4176 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4177 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4178 } 4179 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4180 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4181 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4182 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4183 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4184 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4185 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4186 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4187 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4188 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4189 ierr = PetscFree(onodes);CHKERRQ(ierr); 4190 if (nis) { 4191 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4192 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4193 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4194 } 4195 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4196 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4197 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4198 for (i=0;i<nis;i++) { 4199 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4200 } 4201 *mat_n = NULL; 4202 } 4203 PetscFunctionReturn(0); 4204 } 4205 4206 /* temporary hack into ksp private data structure */ 4207 #include <petsc/private/kspimpl.h> 4208 4209 #undef __FUNCT__ 4210 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4211 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4212 { 4213 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4214 PC_IS *pcis = (PC_IS*)pc->data; 4215 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4216 MatNullSpace CoarseNullSpace=NULL; 4217 ISLocalToGlobalMapping coarse_islg; 4218 IS coarse_is,*isarray; 4219 PetscInt i,im_active=-1,active_procs=-1; 4220 PetscInt nis,nisdofs,nisneu; 4221 PC pc_temp; 4222 PCType coarse_pc_type; 4223 KSPType coarse_ksp_type; 4224 PetscBool multilevel_requested,multilevel_allowed; 4225 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4226 Mat t_coarse_mat_is; 4227 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4228 PetscMPIInt all_procs; 4229 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4230 PetscBool compute_vecs = PETSC_FALSE; 4231 PetscScalar *array; 4232 PetscErrorCode ierr; 4233 4234 PetscFunctionBegin; 4235 /* Assign global numbering to coarse dofs */ 4236 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4237 PetscInt ocoarse_size; 4238 compute_vecs = PETSC_TRUE; 4239 ocoarse_size = pcbddc->coarse_size; 4240 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4241 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4242 /* see if we can avoid some work */ 4243 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4244 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4245 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4246 PC pc; 4247 PetscBool isbddc; 4248 4249 /* temporary workaround since PCBDDC does not have a reset method so far */ 4250 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4251 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4252 if (isbddc) { 4253 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4254 } 4255 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4256 coarse_reuse = PETSC_FALSE; 4257 } else { /* we can safely reuse already computed coarse matrix */ 4258 coarse_reuse = PETSC_TRUE; 4259 } 4260 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4261 coarse_reuse = PETSC_FALSE; 4262 } 4263 /* reset any subassembling information */ 4264 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4265 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4266 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4267 coarse_reuse = PETSC_TRUE; 4268 } 4269 4270 /* count "active" (i.e. with positive local size) and "void" processes */ 4271 im_active = !!(pcis->n); 4272 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4273 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4274 void_procs = all_procs-active_procs; 4275 csin_type_simple = PETSC_TRUE; 4276 redist = PETSC_FALSE; 4277 if (pcbddc->current_level && void_procs) { 4278 csin_ml = PETSC_TRUE; 4279 ncoarse_ml = void_procs; 4280 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4281 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4282 csin_ds = PETSC_TRUE; 4283 ncoarse_ds = pcbddc->redistribute_coarse; 4284 redist = PETSC_TRUE; 4285 } else { 4286 csin_ds = PETSC_TRUE; 4287 ncoarse_ds = active_procs; 4288 redist = PETSC_TRUE; 4289 } 4290 } else { 4291 csin_ml = PETSC_FALSE; 4292 ncoarse_ml = all_procs; 4293 if (void_procs) { 4294 csin_ds = PETSC_TRUE; 4295 ncoarse_ds = void_procs; 4296 csin_type_simple = PETSC_FALSE; 4297 } else { 4298 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4299 csin_ds = PETSC_TRUE; 4300 ncoarse_ds = pcbddc->redistribute_coarse; 4301 redist = PETSC_TRUE; 4302 } else { 4303 csin_ds = PETSC_FALSE; 4304 ncoarse_ds = all_procs; 4305 } 4306 } 4307 } 4308 4309 /* 4310 test if we can go multilevel: three conditions must be satisfied: 4311 - we have not exceeded the number of levels requested 4312 - we can actually subassemble the active processes 4313 - we can find a suitable number of MPI processes where we can place the subassembled problem 4314 */ 4315 multilevel_allowed = PETSC_FALSE; 4316 multilevel_requested = PETSC_FALSE; 4317 if (pcbddc->current_level < pcbddc->max_levels) { 4318 multilevel_requested = PETSC_TRUE; 4319 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4320 multilevel_allowed = PETSC_FALSE; 4321 } else { 4322 multilevel_allowed = PETSC_TRUE; 4323 } 4324 } 4325 /* determine number of process partecipating to coarse solver */ 4326 if (multilevel_allowed) { 4327 ncoarse = ncoarse_ml; 4328 csin = csin_ml; 4329 redist = PETSC_FALSE; 4330 } else { 4331 ncoarse = ncoarse_ds; 4332 csin = csin_ds; 4333 } 4334 4335 /* creates temporary l2gmap and IS for coarse indexes */ 4336 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4337 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4338 4339 /* creates temporary MATIS object for coarse matrix */ 4340 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4341 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4342 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4343 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4344 #if 0 4345 { 4346 PetscViewer viewer; 4347 char filename[256]; 4348 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4349 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4350 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4351 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4352 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4353 } 4354 #endif 4355 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4356 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4357 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4358 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4360 4361 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4362 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4363 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4364 const PetscInt *idxs; 4365 ISLocalToGlobalMapping tmap; 4366 4367 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4368 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4369 /* allocate space for temporary storage */ 4370 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4371 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4372 /* allocate for IS array */ 4373 nisdofs = pcbddc->n_ISForDofsLocal; 4374 nisneu = !!pcbddc->NeumannBoundariesLocal; 4375 nis = nisdofs + nisneu; 4376 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4377 /* dofs splitting */ 4378 for (i=0;i<nisdofs;i++) { 4379 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4380 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4381 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4382 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4383 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4384 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4385 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4386 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4387 } 4388 /* neumann boundaries */ 4389 if (pcbddc->NeumannBoundariesLocal) { 4390 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4391 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4392 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4393 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4394 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4395 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4396 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4397 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4398 } 4399 /* free memory */ 4400 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4401 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4402 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4403 } else { 4404 nis = 0; 4405 nisdofs = 0; 4406 nisneu = 0; 4407 isarray = NULL; 4408 } 4409 /* destroy no longer needed map */ 4410 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4411 4412 /* restrict on coarse candidates (if needed) */ 4413 coarse_mat_is = NULL; 4414 if (csin) { 4415 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4416 if (redist) { 4417 PetscMPIInt rank; 4418 PetscInt spc,n_spc_p1,dest[1],destsize; 4419 4420 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4421 spc = active_procs/ncoarse; 4422 n_spc_p1 = active_procs%ncoarse; 4423 if (im_active) { 4424 destsize = 1; 4425 if (rank > n_spc_p1*(spc+1)-1) { 4426 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4427 } else { 4428 dest[0] = rank/(spc+1); 4429 } 4430 } else { 4431 destsize = 0; 4432 } 4433 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4434 } else if (csin_type_simple) { 4435 PetscMPIInt rank; 4436 PetscInt issize,isidx; 4437 4438 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4439 if (im_active) { 4440 issize = 1; 4441 isidx = (PetscInt)rank; 4442 } else { 4443 issize = 0; 4444 isidx = -1; 4445 } 4446 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4447 } else { /* get a suitable subassembling pattern from MATIS code */ 4448 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4449 } 4450 4451 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4452 if (!redist || ncoarse <= void_procs) { 4453 PetscInt ncoarse_cand,tissize,*nisindices; 4454 PetscInt *coarse_candidates; 4455 const PetscInt* tisindices; 4456 4457 /* get coarse candidates' ranks in pc communicator */ 4458 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4459 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4460 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4461 if (!coarse_candidates[i]) { 4462 coarse_candidates[ncoarse_cand++]=i; 4463 } 4464 } 4465 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4466 4467 4468 if (pcbddc->dbg_flag) { 4469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4470 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4471 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4472 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4473 for (i=0;i<ncoarse_cand;i++) { 4474 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4475 } 4476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4478 } 4479 /* shift the pattern on coarse candidates */ 4480 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4481 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4482 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4483 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4484 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4485 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4486 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4487 } 4488 if (pcbddc->dbg_flag) { 4489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4491 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4493 } 4494 } 4495 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4496 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4497 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4498 } else { /* this is the last level, so use just receiving processes in subcomm */ 4499 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4500 } 4501 } else { 4502 if (pcbddc->dbg_flag) { 4503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4506 } 4507 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4508 coarse_mat_is = t_coarse_mat_is; 4509 } 4510 4511 /* create local to global scatters for coarse problem */ 4512 if (compute_vecs) { 4513 PetscInt lrows; 4514 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4515 if (coarse_mat_is) { 4516 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4517 } else { 4518 lrows = 0; 4519 } 4520 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4521 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4522 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4523 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4524 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4525 } 4526 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4527 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4528 4529 /* set defaults for coarse KSP and PC */ 4530 if (multilevel_allowed) { 4531 coarse_ksp_type = KSPRICHARDSON; 4532 coarse_pc_type = PCBDDC; 4533 } else { 4534 coarse_ksp_type = KSPPREONLY; 4535 coarse_pc_type = PCREDUNDANT; 4536 } 4537 4538 /* print some info if requested */ 4539 if (pcbddc->dbg_flag) { 4540 if (!multilevel_allowed) { 4541 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4542 if (multilevel_requested) { 4543 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4544 } else if (pcbddc->max_levels) { 4545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4546 } 4547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4548 } 4549 } 4550 4551 /* create the coarse KSP object only once with defaults */ 4552 if (coarse_mat_is) { 4553 MatReuse coarse_mat_reuse; 4554 PetscViewer dbg_viewer = NULL; 4555 if (pcbddc->dbg_flag) { 4556 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4557 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4558 } 4559 if (!pcbddc->coarse_ksp) { 4560 char prefix[256],str_level[16]; 4561 size_t len; 4562 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4563 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4564 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4565 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4566 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4567 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4568 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4569 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4570 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4571 /* prefix */ 4572 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4573 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4574 if (!pcbddc->current_level) { 4575 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4576 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4577 } else { 4578 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4579 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4580 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4581 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4582 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4583 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4584 } 4585 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4586 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4587 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4588 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4589 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4590 /* allow user customization */ 4591 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4592 } 4593 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4594 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4595 if (nisdofs) { 4596 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4597 for (i=0;i<nisdofs;i++) { 4598 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4599 } 4600 } 4601 if (nisneu) { 4602 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4603 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4604 } 4605 4606 /* get some info after set from options */ 4607 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4608 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4609 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4610 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4611 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4612 isbddc = PETSC_FALSE; 4613 } 4614 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4615 if (isredundant) { 4616 KSP inner_ksp; 4617 PC inner_pc; 4618 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4619 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4620 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4621 } 4622 4623 /* assemble coarse matrix */ 4624 if (coarse_reuse) { 4625 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4626 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4627 coarse_mat_reuse = MAT_REUSE_MATRIX; 4628 } else { 4629 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4630 } 4631 if (isbddc || isnn) { 4632 if (pcbddc->coarsening_ratio > 1) { 4633 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4634 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4635 if (pcbddc->dbg_flag) { 4636 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4637 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4638 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4639 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4640 } 4641 } 4642 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4643 } else { 4644 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4645 coarse_mat = coarse_mat_is; 4646 } 4647 } else { 4648 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4649 } 4650 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4651 4652 /* propagate symmetry info of coarse matrix */ 4653 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4654 if (pc->pmat->symmetric_set) { 4655 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4656 } 4657 if (pc->pmat->hermitian_set) { 4658 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4659 } 4660 if (pc->pmat->spd_set) { 4661 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4662 } 4663 /* set operators */ 4664 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4665 if (pcbddc->dbg_flag) { 4666 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4667 } 4668 } else { /* processes non partecipating to coarse solver (if any) */ 4669 coarse_mat = 0; 4670 } 4671 ierr = PetscFree(isarray);CHKERRQ(ierr); 4672 #if 0 4673 { 4674 PetscViewer viewer; 4675 char filename[256]; 4676 sprintf(filename,"coarse_mat.m"); 4677 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4678 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4679 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4680 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4681 } 4682 #endif 4683 4684 /* Compute coarse null space (special handling by BDDC only) */ 4685 #if 0 4686 if (pcbddc->NullSpace) { 4687 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4688 } 4689 #endif 4690 4691 if (pcbddc->coarse_ksp) { 4692 Vec crhs,csol; 4693 PetscBool ispreonly; 4694 4695 if (CoarseNullSpace) { 4696 if (isbddc) { 4697 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4698 } else { 4699 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4700 } 4701 } 4702 /* setup coarse ksp */ 4703 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4705 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4706 /* hack */ 4707 if (!csol) { 4708 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4709 } 4710 if (!crhs) { 4711 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4712 } 4713 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4714 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4715 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4716 KSP check_ksp; 4717 KSPType check_ksp_type; 4718 PC check_pc; 4719 Vec check_vec,coarse_vec; 4720 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4721 PetscInt its; 4722 PetscBool compute_eigs; 4723 PetscReal *eigs_r,*eigs_c; 4724 PetscInt neigs; 4725 const char *prefix; 4726 4727 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4728 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4729 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4730 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4731 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4732 if (ispreonly) { 4733 check_ksp_type = KSPPREONLY; 4734 compute_eigs = PETSC_FALSE; 4735 } else { 4736 check_ksp_type = KSPGMRES; 4737 compute_eigs = PETSC_TRUE; 4738 } 4739 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4740 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4741 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4742 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4743 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4744 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4745 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4746 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4747 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4748 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4749 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4750 /* create random vec */ 4751 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4752 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4753 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4754 if (CoarseNullSpace) { 4755 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4756 } 4757 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4758 /* solve coarse problem */ 4759 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4760 if (CoarseNullSpace) { 4761 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4762 } 4763 /* set eigenvalue estimation if preonly has not been requested */ 4764 if (compute_eigs) { 4765 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4766 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4767 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4768 lambda_max = eigs_r[neigs-1]; 4769 lambda_min = eigs_r[0]; 4770 if (pcbddc->use_coarse_estimates) { 4771 if (lambda_max>lambda_min) { 4772 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4773 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4774 } 4775 } 4776 } 4777 4778 /* check coarse problem residual error */ 4779 if (pcbddc->dbg_flag) { 4780 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4781 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4782 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4783 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4784 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4785 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4786 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4787 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4788 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4789 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4790 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4791 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4792 if (compute_eigs) { 4793 PetscReal lambda_max_s,lambda_min_s; 4794 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4795 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4796 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4797 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4798 for (i=0;i<neigs;i++) { 4799 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4800 } 4801 } 4802 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4803 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4804 } 4805 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4806 if (compute_eigs) { 4807 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4808 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4809 } 4810 } 4811 } 4812 /* print additional info */ 4813 if (pcbddc->dbg_flag) { 4814 /* waits until all processes reaches this point */ 4815 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4816 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4817 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4818 } 4819 4820 /* free memory */ 4821 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4822 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4823 PetscFunctionReturn(0); 4824 } 4825 4826 #undef __FUNCT__ 4827 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4828 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4829 { 4830 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4831 PC_IS* pcis = (PC_IS*)pc->data; 4832 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4833 IS subset,subset_mult,subset_n; 4834 PetscInt local_size,coarse_size=0; 4835 PetscInt *local_primal_indices=NULL; 4836 const PetscInt *t_local_primal_indices; 4837 PetscErrorCode ierr; 4838 4839 PetscFunctionBegin; 4840 /* Compute global number of coarse dofs */ 4841 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4842 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4843 } 4844 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4845 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4846 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4847 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4848 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4849 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4850 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4851 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4852 if (local_size != pcbddc->local_primal_size) { 4853 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4854 } 4855 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4856 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4857 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4858 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4859 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4860 4861 /* check numbering */ 4862 if (pcbddc->dbg_flag) { 4863 PetscScalar coarsesum,*array,*array2; 4864 PetscInt i; 4865 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4866 4867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4870 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4871 /* counter */ 4872 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4873 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4874 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4875 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4876 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4877 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4878 4879 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4880 for (i=0;i<pcbddc->local_primal_size;i++) { 4881 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4882 } 4883 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4884 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4885 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4886 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4887 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4888 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4889 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4890 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4891 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4892 for (i=0;i<pcis->n;i++) { 4893 if (array[i] != 0.0 && array[i] != array2[i]) { 4894 PetscInt owned = (PetscInt)(array[i]); 4895 PetscInt neigh = (PetscInt)(array2[i]); 4896 set_error = PETSC_TRUE; 4897 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4898 } 4899 } 4900 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4901 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4902 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4903 for (i=0;i<pcis->n;i++) { 4904 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4905 } 4906 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4907 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4908 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4909 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4910 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4911 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4912 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4913 PetscInt *gidxs; 4914 4915 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4916 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4918 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4919 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4920 for (i=0;i<pcbddc->local_primal_size;i++) { 4921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4922 } 4923 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4924 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4925 } 4926 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4927 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4928 } 4929 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4930 /* get back data */ 4931 *coarse_size_n = coarse_size; 4932 *local_primal_indices_n = local_primal_indices; 4933 PetscFunctionReturn(0); 4934 } 4935 4936 #undef __FUNCT__ 4937 #define __FUNCT__ "PCBDDCGlobalToLocal" 4938 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4939 { 4940 IS localis_t; 4941 PetscInt i,lsize,*idxs,n; 4942 PetscScalar *vals; 4943 PetscErrorCode ierr; 4944 4945 PetscFunctionBegin; 4946 /* get indices in local ordering exploiting local to global map */ 4947 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4948 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4949 for (i=0;i<lsize;i++) vals[i] = 1.0; 4950 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4951 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4952 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4953 if (idxs) { /* multilevel guard */ 4954 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4955 } 4956 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4957 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4958 ierr = PetscFree(vals);CHKERRQ(ierr); 4959 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4960 /* now compute set in local ordering */ 4961 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4962 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4963 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4964 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4965 for (i=0,lsize=0;i<n;i++) { 4966 if (PetscRealPart(vals[i]) > 0.5) { 4967 lsize++; 4968 } 4969 } 4970 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4971 for (i=0,lsize=0;i<n;i++) { 4972 if (PetscRealPart(vals[i]) > 0.5) { 4973 idxs[lsize++] = i; 4974 } 4975 } 4976 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4977 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4978 *localis = localis_t; 4979 PetscFunctionReturn(0); 4980 } 4981 4982 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4983 #undef __FUNCT__ 4984 #define __FUNCT__ "PCBDDCMatMult_Private" 4985 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4986 { 4987 PCBDDCChange_ctx change_ctx; 4988 PetscErrorCode ierr; 4989 4990 PetscFunctionBegin; 4991 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4992 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4993 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4994 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4995 PetscFunctionReturn(0); 4996 } 4997 4998 #undef __FUNCT__ 4999 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5000 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5001 { 5002 PCBDDCChange_ctx change_ctx; 5003 PetscErrorCode ierr; 5004 5005 PetscFunctionBegin; 5006 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5007 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5008 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5009 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5010 PetscFunctionReturn(0); 5011 } 5012 5013 #undef __FUNCT__ 5014 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5015 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5016 { 5017 PC_IS *pcis=(PC_IS*)pc->data; 5018 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5019 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5020 Mat S_j; 5021 PetscInt *used_xadj,*used_adjncy; 5022 PetscBool free_used_adj; 5023 PetscErrorCode ierr; 5024 5025 PetscFunctionBegin; 5026 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5027 free_used_adj = PETSC_FALSE; 5028 if (pcbddc->sub_schurs_layers == -1) { 5029 used_xadj = NULL; 5030 used_adjncy = NULL; 5031 } else { 5032 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5033 used_xadj = pcbddc->mat_graph->xadj; 5034 used_adjncy = pcbddc->mat_graph->adjncy; 5035 } else if (pcbddc->computed_rowadj) { 5036 used_xadj = pcbddc->mat_graph->xadj; 5037 used_adjncy = pcbddc->mat_graph->adjncy; 5038 } else { 5039 PetscBool flg_row=PETSC_FALSE; 5040 const PetscInt *xadj,*adjncy; 5041 PetscInt nvtxs; 5042 5043 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5044 if (flg_row) { 5045 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5046 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5047 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5048 free_used_adj = PETSC_TRUE; 5049 } else { 5050 pcbddc->sub_schurs_layers = -1; 5051 used_xadj = NULL; 5052 used_adjncy = NULL; 5053 } 5054 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5055 } 5056 } 5057 5058 /* setup sub_schurs data */ 5059 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5060 if (!sub_schurs->use_mumps) { 5061 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5062 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5063 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5064 } else { 5065 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5066 PetscBool isseqaij; 5067 if (!pcbddc->use_vertices && reuse_solvers) { 5068 PetscInt n_vertices; 5069 5070 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5071 reuse_solvers = (PetscBool)!n_vertices; 5072 } 5073 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5074 if (!isseqaij) { 5075 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5076 if (matis->A == pcbddc->local_mat) { 5077 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5078 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5079 } else { 5080 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5081 } 5082 } 5083 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5084 } 5085 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5086 5087 /* free adjacency */ 5088 if (free_used_adj) { 5089 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5090 } 5091 PetscFunctionReturn(0); 5092 } 5093 5094 #undef __FUNCT__ 5095 #define __FUNCT__ "PCBDDCInitSubSchurs" 5096 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5097 { 5098 PC_IS *pcis=(PC_IS*)pc->data; 5099 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5100 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5101 PCBDDCGraph graph; 5102 PetscErrorCode ierr; 5103 5104 PetscFunctionBegin; 5105 /* attach interface graph for determining subsets */ 5106 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5107 IS verticesIS,verticescomm; 5108 PetscInt vsize,*idxs; 5109 5110 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5111 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5112 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5113 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5114 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5115 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5116 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5117 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5118 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5119 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5120 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5121 /* 5122 if (pcbddc->dbg_flag) { 5123 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5124 } 5125 */ 5126 } else { 5127 graph = pcbddc->mat_graph; 5128 } 5129 5130 /* sub_schurs init */ 5131 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5132 5133 /* free graph struct */ 5134 if (pcbddc->sub_schurs_rebuild) { 5135 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5136 } 5137 PetscFunctionReturn(0); 5138 } 5139