1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 19 } 20 if (pop) { /* use SF to get values */ 21 PetscScalar *array; 22 23 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 24 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 25 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 26 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 27 } else { /* use VecSetValue */ 28 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 29 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 30 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 37 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 38 { 39 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 40 PetscErrorCode ierr; 41 42 PetscFunctionBegin; 43 /* TODO: add error checking 44 - avoid nested pop (or push) calls. 45 - cannot push before pop. 46 */ 47 if (!pcbddc->zerodiag) { 48 PetscFunctionReturn(0); 49 } 50 if (pop) { 51 const PetscInt *cB0_cols; 52 PetscInt cB0_ncol; 53 const PetscScalar *cB0_vals; 54 55 /* extract B_0 */ 56 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 57 pcbddc->B0_ncol = cB0_ncol; 58 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 59 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 60 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 61 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 62 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 /* remove rows and cols from local problem */ 64 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 65 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 66 } else { /* push */ 67 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 68 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 69 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 70 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 71 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "PCBDDCAdaptiveSelection" 78 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 79 { 80 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 81 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 82 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 83 PetscBLASInt *B_iwork,*B_ifail; 84 PetscScalar *work,lwork; 85 PetscScalar *St,*S,*eigv; 86 PetscScalar *Sarray,*Starray; 87 PetscReal *eigs,thresh; 88 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 89 PetscBool allocated_S_St; 90 #if defined(PETSC_USE_COMPLEX) 91 PetscReal *rwork; 92 #endif 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 if (!sub_schurs->use_mumps) { 97 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 98 } 99 100 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 101 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 102 } 103 104 if (pcbddc->dbg_flag) { 105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 107 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 108 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 109 } 110 111 if (pcbddc->dbg_flag) { 112 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 113 } 114 115 /* max size of subsets */ 116 mss = 0; 117 for (i=0;i<sub_schurs->n_subs;i++) { 118 PetscInt subset_size; 119 120 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 121 mss = PetscMax(mss,subset_size); 122 } 123 124 /* min/max and threshold */ 125 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 126 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 127 nmax = PetscMax(nmin,nmax); 128 allocated_S_St = PETSC_FALSE; 129 if (nmin) { 130 allocated_S_St = PETSC_TRUE; 131 } 132 133 /* allocate lapack workspace */ 134 cum = cum2 = 0; 135 maxneigs = 0; 136 for (i=0;i<sub_schurs->n_subs;i++) { 137 PetscInt n,subset_size; 138 139 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 140 n = PetscMin(subset_size,nmax); 141 cum += subset_size; 142 cum2 += subset_size*n; 143 maxneigs = PetscMax(maxneigs,n); 144 } 145 if (mss) { 146 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 147 PetscBLASInt B_itype = 1; 148 PetscBLASInt B_N = mss; 149 PetscReal zero = 0.0; 150 PetscReal eps = 0.0; /* dlamch? */ 151 152 B_lwork = -1; 153 S = NULL; 154 St = NULL; 155 eigs = NULL; 156 eigv = NULL; 157 B_iwork = NULL; 158 B_ifail = NULL; 159 #if defined(PETSC_USE_COMPLEX) 160 rwork = NULL; 161 #endif 162 thresh = 1.0; 163 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 164 #if defined(PETSC_USE_COMPLEX) 165 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 166 #else 167 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 168 #endif 169 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 171 } else { 172 /* TODO */ 173 } 174 } else { 175 lwork = 0; 176 } 177 178 nv = 0; 179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 180 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 181 } 182 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 183 if (allocated_S_St) { 184 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 185 } 186 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 187 #if defined(PETSC_USE_COMPLEX) 188 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 189 #endif 190 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 191 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 192 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 193 nv+cum,&pcbddc->adaptive_constraints_idxs, 194 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 195 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 196 197 maxneigs = 0; 198 cum = cum2 = cumarray = 0; 199 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 200 pcbddc->adaptive_constraints_data_ptr[0] = 0; 201 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 202 const PetscInt *idxs; 203 204 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 205 for (cum=0;cum<nv;cum++) { 206 pcbddc->adaptive_constraints_n[cum] = 1; 207 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 208 pcbddc->adaptive_constraints_data[cum] = 1.0; 209 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 210 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 211 } 212 cum2 = cum; 213 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 214 } 215 216 if (mss) { /* multilevel */ 217 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 218 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 219 } 220 221 for (i=0;i<sub_schurs->n_subs;i++) { 222 223 const PetscInt *idxs; 224 PetscReal infty = PETSC_MAX_REAL; 225 PetscInt j,subset_size,eigs_start = 0; 226 PetscBLASInt B_N; 227 PetscBool same_data = PETSC_FALSE; 228 229 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 230 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 231 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 232 if (sub_schurs->is_hermitian) { 233 PetscInt j,k; 234 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 235 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 236 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 for (k=j;k<subset_size;k++) { 240 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 241 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 242 } 243 } 244 } else { 245 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 246 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 247 } 248 } else { 249 S = Sarray + cumarray; 250 St = Starray + cumarray; 251 } 252 253 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 254 /* see if we can save some work */ 255 if (sub_schurs->n_subs == 1) { 256 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 257 } 258 259 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 260 B_neigs = 0; 261 } else { 262 /* Threshold: this is an heuristic for edges */ 263 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 264 265 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 266 PetscBLASInt B_itype = 1; 267 PetscBLASInt B_IL, B_IU; 268 PetscReal eps = -1.0; /* dlamch? */ 269 PetscInt nmin_s; 270 271 /* ask for eigenvalues larger than thresh */ 272 if (pcbddc->dbg_flag) { 273 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 274 } 275 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 276 #if defined(PETSC_USE_COMPLEX) 277 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 278 #else 279 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 280 #endif 281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 282 if (B_ierr) { 283 if (B_ierr < 0 ) { 284 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 285 } else if (B_ierr <= B_N) { 286 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 287 } else { 288 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 289 } 290 } 291 292 if (B_neigs > nmax) { 293 if (pcbddc->dbg_flag) { 294 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 295 } 296 eigs_start = B_neigs -nmax; 297 B_neigs = nmax; 298 } 299 300 nmin_s = PetscMin(nmin,B_N); 301 if (B_neigs < nmin_s) { 302 PetscBLASInt B_neigs2; 303 304 B_IU = B_N - B_neigs; 305 B_IL = B_N - nmin_s + 1; 306 if (pcbddc->dbg_flag) { 307 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 308 } 309 if (sub_schurs->is_hermitian) { 310 PetscInt j; 311 for (j=0;j<subset_size;j++) { 312 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 313 } 314 for (j=0;j<subset_size;j++) { 315 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 316 } 317 } else { 318 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 319 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 320 } 321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 322 #if defined(PETSC_USE_COMPLEX) 323 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 324 #else 325 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 326 #endif 327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 328 B_neigs += B_neigs2; 329 } 330 if (B_ierr) { 331 if (B_ierr < 0 ) { 332 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 333 } else if (B_ierr <= B_N) { 334 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 335 } else { 336 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 337 } 338 } 339 if (pcbddc->dbg_flag) { 340 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 341 for (j=0;j<B_neigs;j++) { 342 if (eigs[j] == 0.0) { 343 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 344 } else { 345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 346 } 347 } 348 } 349 } else { 350 /* TODO */ 351 } 352 } 353 maxneigs = PetscMax(B_neigs,maxneigs); 354 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 355 if (B_neigs) { 356 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 357 358 if (pcbddc->dbg_flag > 1) { 359 PetscInt ii; 360 for (ii=0;ii<B_neigs;ii++) { 361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 362 for (j=0;j<B_N;j++) { 363 #if defined(PETSC_USE_COMPLEX) 364 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 365 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 367 #else 368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 369 #endif 370 } 371 } 372 } 373 #if 0 374 for (j=0;j<B_neigs;j++) { 375 PetscBLASInt Blas_N,Blas_one = 1.0; 376 PetscScalar norm; 377 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 378 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 379 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 380 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 381 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 382 } else { 383 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 384 } 385 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 } 387 #endif 388 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 389 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 390 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 391 cum++; 392 } 393 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 394 /* shift for next computation */ 395 cumarray += subset_size*subset_size; 396 } 397 if (pcbddc->dbg_flag) { 398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 399 } 400 401 if (mss) { 402 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 403 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 404 /* destroy matrices (junk) */ 405 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 406 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 407 } 408 if (allocated_S_St) { 409 ierr = PetscFree2(S,St);CHKERRQ(ierr); 410 } 411 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 412 #if defined(PETSC_USE_COMPLEX) 413 ierr = PetscFree(rwork);CHKERRQ(ierr); 414 #endif 415 if (pcbddc->dbg_flag) { 416 PetscInt maxneigs_r; 417 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 419 } 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "PCBDDCSetUpSolvers" 425 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 426 { 427 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 428 PetscScalar *coarse_submat_vals; 429 PetscErrorCode ierr; 430 431 PetscFunctionBegin; 432 /* Setup local scatters R_to_B and (optionally) R_to_D */ 433 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 434 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 435 436 /* Setup local neumann solver ksp_R */ 437 /* PCBDDCSetUpLocalScatters should be called first! */ 438 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 439 440 /* Change global null space passed in by the user if change of basis has been requested */ 441 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 442 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 443 } 444 445 /* 446 Setup local correction and local part of coarse basis. 447 Gives back the dense local part of the coarse matrix in column major ordering 448 */ 449 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 450 451 /* Compute total number of coarse nodes and setup coarse solver */ 452 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 453 454 /* free */ 455 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCResetCustomization" 461 PetscErrorCode PCBDDCResetCustomization(PC pc) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 468 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 469 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 470 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 471 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 472 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 473 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 474 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 475 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 476 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 477 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCResetTopography" 483 PetscErrorCode PCBDDCResetTopography(PC pc) 484 { 485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 492 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 493 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "PCBDDCResetSolvers" 499 PetscErrorCode PCBDDCResetSolvers(PC pc) 500 { 501 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 502 PetscScalar *array; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 507 if (pcbddc->coarse_phi_B) { 508 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 509 ierr = PetscFree(array);CHKERRQ(ierr); 510 } 511 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 512 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 513 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 514 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 515 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 519 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 520 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 521 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 522 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 523 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 524 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 525 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 526 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 527 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 528 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 529 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 530 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 531 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 532 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 533 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 534 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 535 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 536 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 542 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 543 { 544 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 545 PC_IS *pcis = (PC_IS*)pc->data; 546 VecType impVecType; 547 PetscInt n_constraints,n_R,old_size,n_benign; 548 PetscErrorCode ierr; 549 550 PetscFunctionBegin; 551 if (!pcbddc->ConstraintMatrix) { 552 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 553 } 554 /* get sizes */ 555 n_benign = (PetscInt)(!!pcbddc->zerodiag); 556 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 557 if (n_benign && n_constraints) { 558 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 559 } 560 n_R = pcis->n - pcbddc->n_vertices; 561 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 562 /* local work vectors (try to avoid unneeded work)*/ 563 /* R nodes */ 564 old_size = -1; 565 if (pcbddc->vec1_R) { 566 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 567 } 568 if (n_R != old_size) { 569 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 571 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 572 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 573 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 574 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 575 } 576 /* local primal dofs */ 577 old_size = -1; 578 if (pcbddc->vec1_P) { 579 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 580 } 581 if (pcbddc->local_primal_size != old_size) { 582 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 583 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 584 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 585 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 586 } 587 /* local explicit constraints */ 588 old_size = -1; 589 if (pcbddc->vec1_C) { 590 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 591 } 592 if (n_constraints && n_constraints != old_size) { 593 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 594 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 595 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 596 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 597 } 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "PCBDDCSetUpCorrection" 603 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 604 { 605 PetscErrorCode ierr; 606 /* pointers to pcis and pcbddc */ 607 PC_IS* pcis = (PC_IS*)pc->data; 608 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 609 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 610 /* submatrices of local problem */ 611 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 612 /* submatrices of benign trick */ 613 Mat B0_R,B0_V; 614 /* submatrices of local coarse problem */ 615 Mat S_VV,S_CV,S_VC,S_CC; 616 /* working matrices */ 617 Mat C_CR; 618 /* additional working stuff */ 619 PC pc_R; 620 Mat F; 621 Vec vec_R_benign; 622 PetscBool isLU,isCHOL,isILU; 623 624 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 625 PetscScalar *work; 626 PetscInt *idx_V_B; 627 PetscInt n,n_vertices,n_constraints,n_benign; 628 PetscInt i,n_R,n_D,n_B; 629 PetscBool unsymmetric_check; 630 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 631 MatType impMatType; 632 /* some shortcuts to scalars */ 633 PetscScalar one=1.0,m_one=-1.0; 634 635 PetscFunctionBegin; 636 n_benign = (PetscInt)(!!pcbddc->zerodiag); 637 n_vertices = pcbddc->n_vertices; 638 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 639 /* Set Non-overlapping dimensions */ 640 n_B = pcis->n_B; 641 n_D = pcis->n - n_B; 642 n_R = pcis->n - n_vertices; 643 644 /* Set types for local objects needed by BDDC precondtioner */ 645 impMatType = MATSEQDENSE; 646 647 /* vertices in boundary numbering */ 648 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 649 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 650 if (i != n_vertices) { 651 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 652 } 653 654 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 655 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 656 if (!n_benign) { 657 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 658 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 660 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 662 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 663 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 664 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 665 } else { /* these wrappers are not needed with the benign trick */ 666 S_VV = S_CV = S_VC = S_CC = NULL; 667 } 668 669 unsymmetric_check = PETSC_FALSE; 670 /* allocate workspace */ 671 n = 0; 672 if (n_constraints) { 673 n += n_R*n_constraints; 674 } 675 if (n_vertices) { 676 n = PetscMax(2*n_R*n_vertices,n); 677 n = PetscMax((n_R+n_B)*n_vertices,n); 678 } 679 if (!pcbddc->symmetric_primal) { 680 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 681 unsymmetric_check = PETSC_TRUE; 682 } 683 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 684 685 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 686 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 687 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 688 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 690 if (isLU || isILU || isCHOL) { 691 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 692 } else if (sub_schurs->reuse_mumps) { 693 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 694 MatFactorType type; 695 696 F = reuse_mumps->F; 697 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 698 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 699 } else { 700 F = NULL; 701 } 702 703 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 704 if (n_constraints) { 705 Mat M1,M2,M3; 706 Mat auxmat; 707 IS is_aux; 708 PetscScalar *array,*array2; 709 710 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 711 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 712 713 /* Extract constraints on R nodes: C_{CR} */ 714 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 715 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 717 718 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 719 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 720 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 721 for (i=0;i<n_constraints;i++) { 722 const PetscScalar *row_cmat_values; 723 const PetscInt *row_cmat_indices; 724 PetscInt size_of_constraint,j; 725 726 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 727 for (j=0;j<size_of_constraint;j++) { 728 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 729 } 730 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 731 } 732 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 733 if (F) { 734 Mat B; 735 736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 737 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 738 ierr = MatDestroy(&B);CHKERRQ(ierr); 739 } else { 740 PetscScalar *marr; 741 742 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 743 for (i=0;i<n_constraints;i++) { 744 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 745 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 746 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 747 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 748 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 749 } 750 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 751 } 752 if (!pcbddc->switch_static) { 753 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 754 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 755 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 756 for (i=0;i<n_constraints;i++) { 757 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 758 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 759 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 762 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 763 } 764 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 765 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 766 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 767 } else { 768 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 769 pcbddc->local_auxmat2 = local_auxmat2_R; 770 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 771 } 772 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 773 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 774 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 775 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 776 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 777 if (isCHOL) { 778 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 779 } else { 780 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 781 } 782 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 783 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 784 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 785 ierr = MatDestroy(&M2);CHKERRQ(ierr); 786 ierr = MatDestroy(&M3);CHKERRQ(ierr); 787 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 788 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 789 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 790 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 791 ierr = MatDestroy(&M1);CHKERRQ(ierr); 792 } 793 /* Get submatrices from subdomain matrix */ 794 if (n_vertices) { 795 IS is_aux; 796 797 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 798 IS tis; 799 800 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 801 ierr = ISSort(tis);CHKERRQ(ierr); 802 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 803 ierr = ISDestroy(&tis);CHKERRQ(ierr); 804 } else { 805 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 806 } 807 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 808 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 809 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 810 if (n_benign) { 811 Mat B0; 812 IS dummy; 813 PetscInt ii[2]; 814 815 ii[0] = 0; 816 ii[1] = pcbddc->B0_ncol; 817 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 818 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 820 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 821 ierr = MatDestroy(&B0);CHKERRQ(ierr); 822 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 823 } 824 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 825 } 826 827 /* Matrix of coarse basis functions (local) */ 828 if (pcbddc->coarse_phi_B) { 829 PetscInt on_B,on_primal,on_D=n_D; 830 if (pcbddc->coarse_phi_D) { 831 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 832 } 833 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 834 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 835 PetscScalar *marray; 836 837 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 838 ierr = PetscFree(marray);CHKERRQ(ierr); 839 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 840 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 841 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 842 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 843 } 844 } 845 846 if (!pcbddc->coarse_phi_B) { 847 PetscScalar *marray; 848 849 n = n_B*pcbddc->local_primal_size; 850 if (pcbddc->switch_static || pcbddc->dbg_flag) { 851 n += n_D*pcbddc->local_primal_size; 852 } 853 if (!pcbddc->symmetric_primal) { 854 n *= 2; 855 } 856 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 857 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 858 n = n_B*pcbddc->local_primal_size; 859 if (pcbddc->switch_static || pcbddc->dbg_flag) { 860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 861 n += n_D*pcbddc->local_primal_size; 862 } 863 if (!pcbddc->symmetric_primal) { 864 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 865 if (pcbddc->switch_static || pcbddc->dbg_flag) { 866 n = n_B*pcbddc->local_primal_size; 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 868 } 869 } else { 870 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 871 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 872 if (pcbddc->switch_static || pcbddc->dbg_flag) { 873 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 874 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 875 } 876 } 877 } 878 879 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 880 /* vertices */ 881 if (n_vertices) { 882 883 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 884 885 if (n_R) { 886 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 887 Mat S_V_benign; 888 PetscBLASInt B_N,B_one = 1; 889 PetscScalar *x,*y; 890 PetscBool isseqaij; 891 892 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 893 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 894 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 895 if (F) { /* TODO could be optimized for symmetric problems */ 896 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 897 } else { 898 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 899 for (i=0;i<n_vertices;i++) { 900 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 902 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 904 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 905 } 906 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 907 } 908 if (n_benign) { 909 const PetscScalar *vals; 910 const PetscInt *idxs; 911 PetscInt n; 912 913 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 914 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 915 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 916 ierr = VecSetValues(pcbddc->vec1_R,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 917 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 918 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 919 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 920 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,vec_R_benign);CHKERRQ(ierr); 921 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 922 } 923 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 924 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 925 if (n_constraints) { 926 Mat B; 927 928 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 929 for (i=0;i<n_vertices;i++) { 930 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 931 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 932 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 933 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 934 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 935 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 936 } 937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 938 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 939 ierr = MatDestroy(&B);CHKERRQ(ierr); 940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 941 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 942 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 943 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 944 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 945 ierr = MatDestroy(&B);CHKERRQ(ierr); 946 } 947 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 948 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 949 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 950 } 951 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 952 if (n_benign) { 953 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 954 } 955 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 957 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 959 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 960 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 961 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 962 if (!n_benign) { 963 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 964 } else { 965 const PetscScalar *vals; 966 PetscScalar val; 967 const PetscInt *idxs; 968 PetscInt n,j; 969 970 /* Use explicit loops to populate the local coarse matrix */ 971 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 972 for (j=0;j<n_vertices;j++) { 973 for (i=0;i<n_vertices;i++) { 974 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 975 } 976 } 977 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 978 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 979 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 980 for (j=0;j<n_vertices;j++) { 981 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 982 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 983 } 984 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 985 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 986 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 987 ierr = VecDot(vec_R_benign,vec_R_benign,&val);CHKERRQ(ierr); 988 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = -val; 989 } 990 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 991 } else { 992 /* TODO benign case also here */ 993 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 994 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 995 } 996 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 997 /* coarse basis functions */ 998 for (i=0;i<n_vertices;i++) { 999 PetscScalar *y; 1000 1001 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1002 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1003 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 y[n_B*i+idx_V_B[i]] = 1.0; 1007 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1009 1010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1011 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1016 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1017 } 1018 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1019 } 1020 if (n_benign) { 1021 PetscScalar *y; 1022 1023 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1024 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1025 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1028 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1029 1030 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1031 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1032 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1033 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1036 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1037 } 1038 } 1039 /* if n_R == 0 the object is not destroyed */ 1040 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1041 } 1042 1043 if (n_constraints) { 1044 Mat B; 1045 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1047 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1048 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1049 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1050 if (n_vertices) { 1051 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1052 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1053 } else { 1054 Mat S_VCt; 1055 1056 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1057 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1058 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1059 } 1060 } 1061 ierr = MatDestroy(&B);CHKERRQ(ierr); 1062 /* coarse basis functions */ 1063 for (i=0;i<n_constraints;i++) { 1064 PetscScalar *y; 1065 1066 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1067 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1068 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1069 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1072 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1073 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1074 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1075 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1076 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1078 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1079 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1080 } 1081 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1082 } 1083 } 1084 if (n_constraints) { 1085 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1086 } 1087 1088 if (n_benign) { 1089 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1090 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1091 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1092 } 1093 1094 /* compute other basis functions for non-symmetric problems */ 1095 if (!pcbddc->symmetric_primal) { 1096 1097 if (n_constraints) { 1098 Mat S_CCT,B_C; 1099 1100 /* this is a lazy thing */ 1101 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1103 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1104 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1105 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1106 if (n_vertices) { 1107 Mat B_V,S_VCT; 1108 1109 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1110 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1111 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1112 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1113 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1114 } 1115 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1116 } else { /* if there are no constraints, reset work */ 1117 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1118 } 1119 if (n_vertices && n_R) { 1120 Mat A_VRT; 1121 PetscScalar *marray; 1122 PetscBLASInt B_N,B_one = 1; 1123 1124 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1125 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1126 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1127 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1128 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1129 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1130 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1131 } 1132 1133 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1134 for (i=0;i<pcbddc->local_primal_size;i++) { 1135 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1136 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1137 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1138 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1139 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1140 } 1141 } else { 1142 for (i=0;i<pcbddc->local_primal_size;i++) { 1143 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1144 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1145 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1146 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1147 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1148 } 1149 } 1150 /* coarse basis functions */ 1151 for (i=0;i<pcbddc->local_primal_size;i++) { 1152 PetscScalar *y; 1153 1154 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1155 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1156 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1157 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 if (i<n_vertices) { 1160 y[n_B*i+idx_V_B[i]] = 1.0; 1161 } 1162 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1163 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1164 1165 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1166 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1167 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1168 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1171 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1172 } 1173 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1174 } 1175 } 1176 /* free memory */ 1177 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1178 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1179 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1180 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1181 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1182 ierr = PetscFree(work);CHKERRQ(ierr); 1183 if (n_vertices) { 1184 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1185 } 1186 if (n_constraints) { 1187 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1188 } 1189 /* Checking coarse_sub_mat and coarse basis functios */ 1190 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1191 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1192 //if (pcbddc->dbg_flag) { 1193 if (0) { 1194 Mat coarse_sub_mat; 1195 Mat AUXMAT,TM1,TM2,TM3,TM4; 1196 Mat coarse_phi_D,coarse_phi_B; 1197 Mat coarse_psi_D,coarse_psi_B; 1198 Mat A_II,A_BB,A_IB,A_BI; 1199 Mat C_B,CPHI; 1200 IS is_dummy; 1201 Vec mones; 1202 MatType checkmattype=MATSEQAIJ; 1203 PetscReal real_value; 1204 1205 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1206 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1207 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1208 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1209 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1210 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1211 if (unsymmetric_check) { 1212 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1213 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1214 } 1215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1216 1217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1219 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1220 if (unsymmetric_check) { 1221 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1222 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1223 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1224 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1225 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1226 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1227 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1228 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1229 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1230 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1231 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1232 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1233 } else { 1234 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1235 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1236 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1237 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1238 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1239 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1240 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1241 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1242 } 1243 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1244 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1245 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1246 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1247 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1248 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1249 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1251 1252 /* check constraints */ 1253 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1254 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1255 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1256 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1257 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1258 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1259 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1261 if (unsymmetric_check) { 1262 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1263 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1264 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1265 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1267 } 1268 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1269 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1270 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1271 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1272 1273 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1274 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1275 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1276 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1277 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1278 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1279 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1280 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1281 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1282 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1283 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1284 if (unsymmetric_check) { 1285 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1286 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1287 } 1288 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1289 } 1290 /* get back data */ 1291 *coarse_submat_vals_n = coarse_submat_vals; 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1297 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1298 { 1299 Mat *work_mat; 1300 IS isrow_s,iscol_s; 1301 PetscBool rsorted,csorted; 1302 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1303 PetscErrorCode ierr; 1304 1305 PetscFunctionBegin; 1306 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1307 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1308 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1309 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1310 1311 if (!rsorted) { 1312 const PetscInt *idxs; 1313 PetscInt *idxs_sorted,i; 1314 1315 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1316 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1317 for (i=0;i<rsize;i++) { 1318 idxs_perm_r[i] = i; 1319 } 1320 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1321 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1322 for (i=0;i<rsize;i++) { 1323 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1324 } 1325 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1326 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1329 isrow_s = isrow; 1330 } 1331 1332 if (!csorted) { 1333 if (isrow == iscol) { 1334 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1335 iscol_s = isrow_s; 1336 } else { 1337 const PetscInt *idxs; 1338 PetscInt *idxs_sorted,i; 1339 1340 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1341 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1342 for (i=0;i<csize;i++) { 1343 idxs_perm_c[i] = i; 1344 } 1345 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1346 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1347 for (i=0;i<csize;i++) { 1348 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1349 } 1350 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1351 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1352 } 1353 } else { 1354 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1355 iscol_s = iscol; 1356 } 1357 1358 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1359 1360 if (!rsorted || !csorted) { 1361 Mat new_mat; 1362 IS is_perm_r,is_perm_c; 1363 1364 if (!rsorted) { 1365 PetscInt *idxs_r,i; 1366 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1367 for (i=0;i<rsize;i++) { 1368 idxs_r[idxs_perm_r[i]] = i; 1369 } 1370 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1371 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1372 } else { 1373 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1374 } 1375 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1376 1377 if (!csorted) { 1378 if (isrow_s == iscol_s) { 1379 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1380 is_perm_c = is_perm_r; 1381 } else { 1382 PetscInt *idxs_c,i; 1383 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1384 for (i=0;i<csize;i++) { 1385 idxs_c[idxs_perm_c[i]] = i; 1386 } 1387 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1388 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1389 } 1390 } else { 1391 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1392 } 1393 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1394 1395 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1396 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1397 work_mat[0] = new_mat; 1398 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1399 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1400 } 1401 1402 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1403 *B = work_mat[0]; 1404 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1405 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1406 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1412 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1413 { 1414 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1415 PC_IS* pcis = (PC_IS*)pc->data; 1416 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1417 Mat new_mat; 1418 IS is_local,is_global; 1419 PetscInt local_size; 1420 PetscBool isseqaij; 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1425 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1426 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1427 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1428 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1429 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1431 1432 /* check */ 1433 if (pcbddc->dbg_flag) { 1434 Vec x,x_change; 1435 PetscReal error; 1436 1437 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1438 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1439 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1440 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1443 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1444 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1445 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1446 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1447 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1449 ierr = VecDestroy(&x);CHKERRQ(ierr); 1450 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1451 } 1452 1453 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1454 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1455 if (isseqaij) { 1456 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1457 } else { 1458 Mat work_mat; 1459 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1460 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1461 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1462 } 1463 if (matis->A->symmetric_set) { 1464 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1465 #if !defined(PETSC_USE_COMPLEX) 1466 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1467 #endif 1468 } 1469 /* 1470 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1471 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1472 */ 1473 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1479 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1480 { 1481 PC_IS* pcis = (PC_IS*)(pc->data); 1482 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1483 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1484 PetscInt *idx_R_local=NULL; 1485 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1486 PetscInt vbs,bs; 1487 PetscBT bitmask=NULL; 1488 PetscErrorCode ierr; 1489 1490 PetscFunctionBegin; 1491 /* 1492 No need to setup local scatters if 1493 - primal space is unchanged 1494 AND 1495 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1496 AND 1497 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1498 */ 1499 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1500 PetscFunctionReturn(0); 1501 } 1502 /* destroy old objects */ 1503 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1504 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1505 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1506 /* Set Non-overlapping dimensions */ 1507 n_B = pcis->n_B; 1508 n_D = pcis->n - n_B; 1509 n_vertices = pcbddc->n_vertices; 1510 1511 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1512 1513 /* create auxiliary bitmask and allocate workspace */ 1514 if (!sub_schurs->reuse_mumps) { 1515 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1516 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1517 for (i=0;i<n_vertices;i++) { 1518 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1519 } 1520 1521 for (i=0, n_R=0; i<pcis->n; i++) { 1522 if (!PetscBTLookup(bitmask,i)) { 1523 idx_R_local[n_R++] = i; 1524 } 1525 } 1526 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1527 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1528 1529 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1530 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1531 } 1532 1533 /* Block code */ 1534 vbs = 1; 1535 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1536 if (bs>1 && !(n_vertices%bs)) { 1537 PetscBool is_blocked = PETSC_TRUE; 1538 PetscInt *vary; 1539 if (!sub_schurs->reuse_mumps) { 1540 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1541 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1542 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1543 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1544 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1545 for (i=0; i<pcis->n/bs; i++) { 1546 if (vary[i]!=0 && vary[i]!=bs) { 1547 is_blocked = PETSC_FALSE; 1548 break; 1549 } 1550 } 1551 ierr = PetscFree(vary);CHKERRQ(ierr); 1552 } else { 1553 /* Verify directly the R set */ 1554 for (i=0; i<n_R/bs; i++) { 1555 PetscInt j,node=idx_R_local[bs*i]; 1556 for (j=1; j<bs; j++) { 1557 if (node != idx_R_local[bs*i+j]-j) { 1558 is_blocked = PETSC_FALSE; 1559 break; 1560 } 1561 } 1562 } 1563 } 1564 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1565 vbs = bs; 1566 for (i=0;i<n_R/vbs;i++) { 1567 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1568 } 1569 } 1570 } 1571 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1572 if (sub_schurs->reuse_mumps) { 1573 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1574 1575 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1576 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1577 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1578 reuse_mumps->is_R = pcbddc->is_R_local; 1579 } else { 1580 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1581 } 1582 1583 /* print some info if requested */ 1584 if (pcbddc->dbg_flag) { 1585 PetscInt benign; 1586 1587 benign = (PetscInt)(!!pcbddc->zerodiag); 1588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1590 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1592 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1594 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1595 } 1596 1597 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1598 if (!sub_schurs->reuse_mumps) { 1599 IS is_aux1,is_aux2; 1600 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1601 1602 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1603 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1604 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1605 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1606 for (i=0; i<n_D; i++) { 1607 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1608 } 1609 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1610 for (i=0, j=0; i<n_R; i++) { 1611 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1612 aux_array1[j++] = i; 1613 } 1614 } 1615 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1616 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1617 for (i=0, j=0; i<n_B; i++) { 1618 if (!PetscBTLookup(bitmask,is_indices[i])) { 1619 aux_array2[j++] = i; 1620 } 1621 } 1622 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1623 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1624 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1625 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1626 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1627 1628 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1629 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1630 for (i=0, j=0; i<n_R; i++) { 1631 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1632 aux_array1[j++] = i; 1633 } 1634 } 1635 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1636 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1637 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1638 } 1639 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1640 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1641 } else { 1642 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1643 IS tis; 1644 PetscInt schur_size; 1645 1646 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1647 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1648 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1649 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1650 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1651 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1652 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1653 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1654 } 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1662 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1663 { 1664 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1665 PC_IS *pcis = (PC_IS*)pc->data; 1666 PC pc_temp; 1667 Mat A_RR; 1668 MatReuse reuse; 1669 PetscScalar m_one = -1.0; 1670 PetscReal value; 1671 PetscInt n_D,n_R; 1672 PetscBool use_exact,use_exact_reduced,issbaij; 1673 PetscErrorCode ierr; 1674 /* prefixes stuff */ 1675 char dir_prefix[256],neu_prefix[256],str_level[16]; 1676 size_t len; 1677 1678 PetscFunctionBegin; 1679 1680 /* compute prefixes */ 1681 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1682 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1683 if (!pcbddc->current_level) { 1684 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1685 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1686 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1687 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1688 } else { 1689 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1690 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1691 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1692 len -= 15; /* remove "pc_bddc_coarse_" */ 1693 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1694 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1695 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1696 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1697 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1698 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1699 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1700 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1701 } 1702 1703 /* DIRICHLET PROBLEM */ 1704 if (dirichlet) { 1705 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1706 if (pcbddc->local_mat->symmetric_set) { 1707 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1708 } 1709 /* Matrix for Dirichlet problem is pcis->A_II */ 1710 n_D = pcis->n - pcis->n_B; 1711 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1712 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1713 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1714 /* default */ 1715 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1716 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1717 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1718 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1719 if (issbaij) { 1720 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1721 } else { 1722 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1723 } 1724 /* Allow user's customization */ 1725 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1726 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1727 } 1728 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1729 if (sub_schurs->reuse_mumps) { 1730 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1731 1732 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1733 } 1734 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1735 if (!n_D) { 1736 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1737 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1738 } 1739 /* Set Up KSP for Dirichlet problem of BDDC */ 1740 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1741 /* set ksp_D into pcis data */ 1742 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1743 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1744 pcis->ksp_D = pcbddc->ksp_D; 1745 } 1746 1747 /* NEUMANN PROBLEM */ 1748 A_RR = 0; 1749 if (neumann) { 1750 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1751 PetscInt ibs,mbs; 1752 PetscBool issbaij; 1753 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1754 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1755 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1756 if (pcbddc->ksp_R) { /* already created ksp */ 1757 PetscInt nn_R; 1758 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1759 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1760 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1761 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1762 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1763 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1764 reuse = MAT_INITIAL_MATRIX; 1765 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1766 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1767 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1768 reuse = MAT_INITIAL_MATRIX; 1769 } else { /* safe to reuse the matrix */ 1770 reuse = MAT_REUSE_MATRIX; 1771 } 1772 } 1773 /* last check */ 1774 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1775 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1776 reuse = MAT_INITIAL_MATRIX; 1777 } 1778 } else { /* first time, so we need to create the matrix */ 1779 reuse = MAT_INITIAL_MATRIX; 1780 } 1781 /* extract A_RR */ 1782 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1783 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1784 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1785 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1786 if (matis->A == pcbddc->local_mat) { 1787 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1788 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1789 } else { 1790 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1791 } 1792 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1793 if (matis->A == pcbddc->local_mat) { 1794 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1795 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1796 } else { 1797 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1798 } 1799 } 1800 if (!sub_schurs->reuse_mumps) { 1801 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1802 if (pcbddc->local_mat->symmetric_set) { 1803 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1804 } 1805 } else { 1806 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1807 1808 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1809 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1810 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1811 } 1812 if (!pcbddc->ksp_R) { /* create object if not present */ 1813 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1814 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1815 /* default */ 1816 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1817 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1818 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1819 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1820 if (issbaij) { 1821 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1822 } else { 1823 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1824 } 1825 /* Allow user's customization */ 1826 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1827 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1828 } 1829 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1830 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1831 if (!n_R) { 1832 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1833 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1834 } 1835 /* Reuse MUMPS solver if it is present */ 1836 if (sub_schurs->reuse_mumps) { 1837 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1838 1839 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1840 } 1841 /* Set Up KSP for Neumann problem of BDDC */ 1842 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1843 } 1844 /* free Neumann problem's matrix */ 1845 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1846 1847 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1848 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1849 if (pcbddc->dbg_flag) { 1850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1851 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1852 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1853 } 1854 if (dirichlet) { /* Dirichlet */ 1855 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1856 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1857 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1858 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1859 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1860 /* need to be adapted? */ 1861 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1862 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1863 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1864 /* print info */ 1865 if (pcbddc->dbg_flag) { 1866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1868 } 1869 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1870 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1871 } 1872 } 1873 if (neumann) { /* Neumann */ 1874 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1875 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1876 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1877 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1878 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1879 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1880 /* need to be adapted? */ 1881 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1882 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1883 /* print info */ 1884 if (pcbddc->dbg_flag) { 1885 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1886 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1887 } 1888 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1889 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1890 } 1891 } 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1898 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1899 { 1900 PetscErrorCode ierr; 1901 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1902 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1903 1904 PetscFunctionBegin; 1905 if (!sub_schurs->reuse_mumps) { 1906 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1907 } 1908 if (!pcbddc->switch_static) { 1909 if (applytranspose && pcbddc->local_auxmat1) { 1910 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1911 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1912 } 1913 if (!sub_schurs->reuse_mumps) { 1914 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1915 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1916 } else { 1917 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1918 1919 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1920 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1921 } 1922 } else { 1923 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1924 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1925 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1926 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1927 if (applytranspose && pcbddc->local_auxmat1) { 1928 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1929 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1930 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1931 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1932 } 1933 } 1934 if (!sub_schurs->reuse_mumps) { 1935 if (applytranspose) { 1936 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1937 } else { 1938 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1939 } 1940 #if defined(PETSC_HAVE_MUMPS) 1941 } else { 1942 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1943 1944 if (applytranspose) { 1945 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1946 } else { 1947 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1948 } 1949 #endif 1950 } 1951 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1952 if (!pcbddc->switch_static) { 1953 if (!sub_schurs->reuse_mumps) { 1954 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1955 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1956 } else { 1957 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1958 1959 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1961 } 1962 if (!applytranspose && pcbddc->local_auxmat1) { 1963 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1964 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1965 } 1966 } else { 1967 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1968 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1969 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 if (!applytranspose && pcbddc->local_auxmat1) { 1972 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1973 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1974 } 1975 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1977 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1978 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1979 } 1980 PetscFunctionReturn(0); 1981 } 1982 1983 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1984 #undef __FUNCT__ 1985 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1986 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1987 { 1988 PetscErrorCode ierr; 1989 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1990 PC_IS* pcis = (PC_IS*) (pc->data); 1991 const PetscScalar zero = 0.0; 1992 1993 PetscFunctionBegin; 1994 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1995 if (applytranspose) { 1996 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1997 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1998 } else { 1999 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2000 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2001 } 2002 2003 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2004 if (pcbddc->benign_change) { 2005 PetscScalar *array; 2006 2007 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2008 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2009 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2010 } 2011 2012 /* start communications from local primal nodes to rhs of coarse solver */ 2013 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2014 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2015 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2016 2017 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2018 /* TODO remove null space when doing multilevel */ 2019 if (pcbddc->coarse_ksp) { 2020 Vec rhs,sol; 2021 2022 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2023 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2024 if (applytranspose) { 2025 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2026 } else { 2027 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2028 } 2029 } 2030 2031 /* Local solution on R nodes */ 2032 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2033 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2034 } 2035 2036 /* communications from coarse sol to local primal nodes */ 2037 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2038 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2039 2040 /* Sum contributions from two levels */ 2041 if (applytranspose) { 2042 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2043 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2044 } else { 2045 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2046 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2047 } 2048 /* store p0 */ 2049 if (pcbddc->benign_change) { 2050 PetscScalar *array; 2051 2052 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2053 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2054 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2061 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2062 { 2063 PetscErrorCode ierr; 2064 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2065 PetscScalar *array; 2066 Vec from,to; 2067 2068 PetscFunctionBegin; 2069 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2070 from = pcbddc->coarse_vec; 2071 to = pcbddc->vec1_P; 2072 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2073 Vec tvec; 2074 2075 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2076 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2077 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2078 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2079 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2080 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2081 } 2082 } else { /* from local to global -> put data in coarse right hand side */ 2083 from = pcbddc->vec1_P; 2084 to = pcbddc->coarse_vec; 2085 } 2086 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2092 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2093 { 2094 PetscErrorCode ierr; 2095 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2096 PetscScalar *array; 2097 Vec from,to; 2098 2099 PetscFunctionBegin; 2100 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2101 from = pcbddc->coarse_vec; 2102 to = pcbddc->vec1_P; 2103 } else { /* from local to global -> put data in coarse right hand side */ 2104 from = pcbddc->vec1_P; 2105 to = pcbddc->coarse_vec; 2106 } 2107 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2108 if (smode == SCATTER_FORWARD) { 2109 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2110 Vec tvec; 2111 2112 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2113 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2114 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2115 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2116 } 2117 } else { 2118 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2119 ierr = VecResetArray(from);CHKERRQ(ierr); 2120 } 2121 } 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /* uncomment for testing purposes */ 2126 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2129 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2130 { 2131 PetscErrorCode ierr; 2132 PC_IS* pcis = (PC_IS*)(pc->data); 2133 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2134 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2135 /* one and zero */ 2136 PetscScalar one=1.0,zero=0.0; 2137 /* space to store constraints and their local indices */ 2138 PetscScalar *constraints_data; 2139 PetscInt *constraints_idxs,*constraints_idxs_B; 2140 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2141 PetscInt *constraints_n; 2142 /* iterators */ 2143 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2144 /* BLAS integers */ 2145 PetscBLASInt lwork,lierr; 2146 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2147 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2148 /* reuse */ 2149 PetscInt olocal_primal_size,olocal_primal_size_cc; 2150 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2151 /* change of basis */ 2152 PetscBool qr_needed; 2153 PetscBT change_basis,qr_needed_idx; 2154 /* auxiliary stuff */ 2155 PetscInt *nnz,*is_indices; 2156 PetscInt ncc; 2157 /* some quantities */ 2158 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2159 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2160 2161 PetscFunctionBegin; 2162 /* Destroy Mat objects computed previously */ 2163 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2164 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2165 /* save info on constraints from previous setup (if any) */ 2166 olocal_primal_size = pcbddc->local_primal_size; 2167 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2168 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2169 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2170 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2171 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2172 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2173 2174 /* print some info */ 2175 if (pcbddc->dbg_flag) { 2176 IS vertices; 2177 PetscInt nv,nedges,nfaces; 2178 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2179 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2180 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2181 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2182 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2184 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2187 } 2188 2189 if (!pcbddc->adaptive_selection) { 2190 IS ISForVertices,*ISForFaces,*ISForEdges; 2191 MatNullSpace nearnullsp; 2192 const Vec *nearnullvecs; 2193 Vec *localnearnullsp; 2194 PetscScalar *array; 2195 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2196 PetscBool nnsp_has_cnst; 2197 /* LAPACK working arrays for SVD or POD */ 2198 PetscBool skip_lapack,boolforchange; 2199 PetscScalar *work; 2200 PetscReal *singular_vals; 2201 #if defined(PETSC_USE_COMPLEX) 2202 PetscReal *rwork; 2203 #endif 2204 #if defined(PETSC_MISSING_LAPACK_GESVD) 2205 PetscScalar *temp_basis,*correlation_mat; 2206 #else 2207 PetscBLASInt dummy_int=1; 2208 PetscScalar dummy_scalar=1.; 2209 #endif 2210 2211 /* Get index sets for faces, edges and vertices from graph */ 2212 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2213 /* free unneeded index sets */ 2214 if (!pcbddc->use_vertices) { 2215 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2216 } 2217 if (!pcbddc->use_edges) { 2218 for (i=0;i<n_ISForEdges;i++) { 2219 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2220 } 2221 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2222 n_ISForEdges = 0; 2223 } 2224 if (!pcbddc->use_faces) { 2225 for (i=0;i<n_ISForFaces;i++) { 2226 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2227 } 2228 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2229 n_ISForFaces = 0; 2230 } 2231 2232 #if defined(PETSC_USE_DEBUG) 2233 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2234 Also use_change_of_basis should be consistent among processors */ 2235 if (pcbddc->NullSpace) { 2236 PetscBool tbool[2],gbool[2]; 2237 2238 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2239 pcbddc->use_change_of_basis = PETSC_TRUE; 2240 if (!ISForEdges) { 2241 pcbddc->use_change_on_faces = PETSC_TRUE; 2242 } 2243 } 2244 tbool[0] = pcbddc->use_change_of_basis; 2245 tbool[1] = pcbddc->use_change_on_faces; 2246 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2247 pcbddc->use_change_of_basis = gbool[0]; 2248 pcbddc->use_change_on_faces = gbool[1]; 2249 } 2250 #endif 2251 2252 /* check if near null space is attached to global mat */ 2253 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2254 if (nearnullsp) { 2255 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2256 /* remove any stored info */ 2257 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2258 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2259 /* store information for BDDC solver reuse */ 2260 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2261 pcbddc->onearnullspace = nearnullsp; 2262 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2263 for (i=0;i<nnsp_size;i++) { 2264 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2265 } 2266 } else { /* if near null space is not provided BDDC uses constants by default */ 2267 nnsp_size = 0; 2268 nnsp_has_cnst = PETSC_TRUE; 2269 } 2270 /* get max number of constraints on a single cc */ 2271 max_constraints = nnsp_size; 2272 if (nnsp_has_cnst) max_constraints++; 2273 2274 /* 2275 Evaluate maximum storage size needed by the procedure 2276 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2277 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2278 There can be multiple constraints per connected component 2279 */ 2280 n_vertices = 0; 2281 if (ISForVertices) { 2282 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2283 } 2284 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2285 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2286 2287 total_counts = n_ISForFaces+n_ISForEdges; 2288 total_counts *= max_constraints; 2289 total_counts += n_vertices; 2290 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2291 2292 total_counts = 0; 2293 max_size_of_constraint = 0; 2294 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2295 IS used_is; 2296 if (i<n_ISForEdges) { 2297 used_is = ISForEdges[i]; 2298 } else { 2299 used_is = ISForFaces[i-n_ISForEdges]; 2300 } 2301 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2302 total_counts += j; 2303 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2304 } 2305 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2306 2307 /* get local part of global near null space vectors */ 2308 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2309 for (k=0;k<nnsp_size;k++) { 2310 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2311 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2312 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2313 } 2314 2315 /* whether or not to skip lapack calls */ 2316 skip_lapack = PETSC_TRUE; 2317 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2318 2319 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2320 if (!skip_lapack) { 2321 PetscScalar temp_work; 2322 2323 #if defined(PETSC_MISSING_LAPACK_GESVD) 2324 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2325 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2326 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2327 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2328 #if defined(PETSC_USE_COMPLEX) 2329 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2330 #endif 2331 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2332 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2334 lwork = -1; 2335 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2336 #if !defined(PETSC_USE_COMPLEX) 2337 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2338 #else 2339 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2340 #endif 2341 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2342 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2343 #else /* on missing GESVD */ 2344 /* SVD */ 2345 PetscInt max_n,min_n; 2346 max_n = max_size_of_constraint; 2347 min_n = max_constraints; 2348 if (max_size_of_constraint < max_constraints) { 2349 min_n = max_size_of_constraint; 2350 max_n = max_constraints; 2351 } 2352 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2353 #if defined(PETSC_USE_COMPLEX) 2354 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2355 #endif 2356 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2357 lwork = -1; 2358 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2359 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2360 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2361 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2362 #if !defined(PETSC_USE_COMPLEX) 2363 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2364 #else 2365 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2366 #endif 2367 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2368 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2369 #endif /* on missing GESVD */ 2370 /* Allocate optimal workspace */ 2371 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2372 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2373 } 2374 /* Now we can loop on constraining sets */ 2375 total_counts = 0; 2376 constraints_idxs_ptr[0] = 0; 2377 constraints_data_ptr[0] = 0; 2378 /* vertices */ 2379 if (n_vertices) { 2380 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2381 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2382 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2383 for (i=0;i<n_vertices;i++) { 2384 constraints_n[total_counts] = 1; 2385 constraints_data[total_counts] = 1.0; 2386 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2387 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2388 total_counts++; 2389 } 2390 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2391 PetscBool used_vertex; 2392 for (i=0;i<n_vertices;i++) { 2393 used_vertex = PETSC_FALSE; 2394 k = 0; 2395 while (!used_vertex && k<nnsp_size) { 2396 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2397 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2398 constraints_n[total_counts] = 1; 2399 constraints_idxs[total_counts] = is_indices[i]; 2400 constraints_data[total_counts] = 1.0; 2401 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2402 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2403 total_counts++; 2404 used_vertex = PETSC_TRUE; 2405 } 2406 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2407 k++; 2408 } 2409 } 2410 } 2411 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2412 n_vertices = total_counts; 2413 } 2414 2415 /* edges and faces */ 2416 total_counts_cc = total_counts; 2417 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2418 IS used_is; 2419 PetscBool idxs_copied = PETSC_FALSE; 2420 2421 if (ncc<n_ISForEdges) { 2422 used_is = ISForEdges[ncc]; 2423 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2424 } else { 2425 used_is = ISForFaces[ncc-n_ISForEdges]; 2426 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2427 } 2428 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2429 2430 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2431 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2432 /* change of basis should not be performed on local periodic nodes */ 2433 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2434 if (nnsp_has_cnst) { 2435 PetscScalar quad_value; 2436 2437 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2438 idxs_copied = PETSC_TRUE; 2439 2440 if (!pcbddc->use_nnsp_true) { 2441 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2442 } else { 2443 quad_value = 1.0; 2444 } 2445 for (j=0;j<size_of_constraint;j++) { 2446 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2447 } 2448 temp_constraints++; 2449 total_counts++; 2450 } 2451 for (k=0;k<nnsp_size;k++) { 2452 PetscReal real_value; 2453 PetscScalar *ptr_to_data; 2454 2455 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2456 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2457 for (j=0;j<size_of_constraint;j++) { 2458 ptr_to_data[j] = array[is_indices[j]]; 2459 } 2460 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2461 /* check if array is null on the connected component */ 2462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2463 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2464 if (real_value > 0.0) { /* keep indices and values */ 2465 temp_constraints++; 2466 total_counts++; 2467 if (!idxs_copied) { 2468 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2469 idxs_copied = PETSC_TRUE; 2470 } 2471 } 2472 } 2473 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2474 valid_constraints = temp_constraints; 2475 if (!pcbddc->use_nnsp_true && temp_constraints) { 2476 if (temp_constraints == 1) { /* just normalize the constraint */ 2477 PetscScalar norm,*ptr_to_data; 2478 2479 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2480 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2481 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2482 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2483 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2484 } else { /* perform SVD */ 2485 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2486 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2487 2488 #if defined(PETSC_MISSING_LAPACK_GESVD) 2489 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2490 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2491 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2492 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2493 from that computed using LAPACKgesvd 2494 -> This is due to a different computation of eigenvectors in LAPACKheev 2495 -> The quality of the POD-computed basis will be the same */ 2496 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2497 /* Store upper triangular part of correlation matrix */ 2498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2499 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2500 for (j=0;j<temp_constraints;j++) { 2501 for (k=0;k<j+1;k++) { 2502 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2503 } 2504 } 2505 /* compute eigenvalues and eigenvectors of correlation matrix */ 2506 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2507 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2508 #if !defined(PETSC_USE_COMPLEX) 2509 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2510 #else 2511 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2512 #endif 2513 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2514 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2515 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2516 j = 0; 2517 while (j < temp_constraints && singular_vals[j] < tol) j++; 2518 total_counts = total_counts-j; 2519 valid_constraints = temp_constraints-j; 2520 /* scale and copy POD basis into used quadrature memory */ 2521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2523 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2525 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2527 if (j<temp_constraints) { 2528 PetscInt ii; 2529 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2530 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2531 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2532 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2533 for (k=0;k<temp_constraints-j;k++) { 2534 for (ii=0;ii<size_of_constraint;ii++) { 2535 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2536 } 2537 } 2538 } 2539 #else /* on missing GESVD */ 2540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2541 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2543 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2544 #if !defined(PETSC_USE_COMPLEX) 2545 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2546 #else 2547 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2548 #endif 2549 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2551 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2552 k = temp_constraints; 2553 if (k > size_of_constraint) k = size_of_constraint; 2554 j = 0; 2555 while (j < k && singular_vals[k-j-1] < tol) j++; 2556 valid_constraints = k-j; 2557 total_counts = total_counts-temp_constraints+valid_constraints; 2558 #endif /* on missing GESVD */ 2559 } 2560 } 2561 /* update pointers information */ 2562 if (valid_constraints) { 2563 constraints_n[total_counts_cc] = valid_constraints; 2564 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2565 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2566 /* set change_of_basis flag */ 2567 if (boolforchange) { 2568 PetscBTSet(change_basis,total_counts_cc); 2569 } 2570 total_counts_cc++; 2571 } 2572 } 2573 /* free workspace */ 2574 if (!skip_lapack) { 2575 ierr = PetscFree(work);CHKERRQ(ierr); 2576 #if defined(PETSC_USE_COMPLEX) 2577 ierr = PetscFree(rwork);CHKERRQ(ierr); 2578 #endif 2579 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2580 #if defined(PETSC_MISSING_LAPACK_GESVD) 2581 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2582 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2583 #endif 2584 } 2585 for (k=0;k<nnsp_size;k++) { 2586 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2587 } 2588 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2589 /* free index sets of faces, edges and vertices */ 2590 for (i=0;i<n_ISForFaces;i++) { 2591 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2592 } 2593 if (n_ISForFaces) { 2594 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2595 } 2596 for (i=0;i<n_ISForEdges;i++) { 2597 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2598 } 2599 if (n_ISForEdges) { 2600 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2601 } 2602 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2603 } else { 2604 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2605 2606 total_counts = 0; 2607 n_vertices = 0; 2608 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2609 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2610 } 2611 max_constraints = 0; 2612 total_counts_cc = 0; 2613 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2614 total_counts += pcbddc->adaptive_constraints_n[i]; 2615 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2616 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2617 } 2618 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2619 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2620 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2621 constraints_data = pcbddc->adaptive_constraints_data; 2622 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2623 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2624 total_counts_cc = 0; 2625 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2626 if (pcbddc->adaptive_constraints_n[i]) { 2627 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2628 } 2629 } 2630 #if 0 2631 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2632 for (i=0;i<total_counts_cc;i++) { 2633 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2634 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2635 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2636 printf(" %d",constraints_idxs[j]); 2637 } 2638 printf("\n"); 2639 printf("number of cc: %d\n",constraints_n[i]); 2640 } 2641 for (i=0;i<n_vertices;i++) { 2642 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2643 } 2644 for (i=0;i<sub_schurs->n_subs;i++) { 2645 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2646 } 2647 #endif 2648 2649 max_size_of_constraint = 0; 2650 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2651 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2652 /* Change of basis */ 2653 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2654 if (pcbddc->use_change_of_basis) { 2655 for (i=0;i<sub_schurs->n_subs;i++) { 2656 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2657 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2658 } 2659 } 2660 } 2661 } 2662 pcbddc->local_primal_size = total_counts; 2663 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2664 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2665 2666 /* map constraints_idxs in boundary numbering */ 2667 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2668 if (i != constraints_idxs_ptr[total_counts_cc]) { 2669 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2670 } 2671 2672 /* Create constraint matrix */ 2673 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2674 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2675 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2676 2677 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2678 /* determine if a QR strategy is needed for change of basis */ 2679 qr_needed = PETSC_FALSE; 2680 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2681 total_primal_vertices=0; 2682 pcbddc->local_primal_size_cc = 0; 2683 for (i=0;i<total_counts_cc;i++) { 2684 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2685 if (size_of_constraint == 1) { 2686 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2687 pcbddc->local_primal_size_cc += 1; 2688 } else if (PetscBTLookup(change_basis,i)) { 2689 for (k=0;k<constraints_n[i];k++) { 2690 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2691 } 2692 pcbddc->local_primal_size_cc += constraints_n[i]; 2693 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2694 PetscBTSet(qr_needed_idx,i); 2695 qr_needed = PETSC_TRUE; 2696 } 2697 } else { 2698 pcbddc->local_primal_size_cc += 1; 2699 } 2700 } 2701 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2702 pcbddc->n_vertices = total_primal_vertices; 2703 /* permute indices in order to have a sorted set of vertices */ 2704 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2705 2706 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2707 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2708 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2709 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2710 2711 /* nonzero structure of constraint matrix */ 2712 /* and get reference dof for local constraints */ 2713 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2714 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2715 2716 j = total_primal_vertices; 2717 total_counts = total_primal_vertices; 2718 cum = total_primal_vertices; 2719 for (i=n_vertices;i<total_counts_cc;i++) { 2720 if (!PetscBTLookup(change_basis,i)) { 2721 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2722 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2723 cum++; 2724 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2725 for (k=0;k<constraints_n[i];k++) { 2726 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2727 nnz[j+k] = size_of_constraint; 2728 } 2729 j += constraints_n[i]; 2730 } 2731 } 2732 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2733 ierr = PetscFree(nnz);CHKERRQ(ierr); 2734 2735 /* set values in constraint matrix */ 2736 for (i=0;i<total_primal_vertices;i++) { 2737 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2738 } 2739 total_counts = total_primal_vertices; 2740 for (i=n_vertices;i<total_counts_cc;i++) { 2741 if (!PetscBTLookup(change_basis,i)) { 2742 PetscInt *cols; 2743 2744 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2745 cols = constraints_idxs+constraints_idxs_ptr[i]; 2746 for (k=0;k<constraints_n[i];k++) { 2747 PetscInt row = total_counts+k; 2748 PetscScalar *vals; 2749 2750 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2751 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2752 } 2753 total_counts += constraints_n[i]; 2754 } 2755 } 2756 /* assembling */ 2757 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 2760 /* 2761 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2762 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2763 */ 2764 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2765 if (pcbddc->use_change_of_basis) { 2766 /* dual and primal dofs on a single cc */ 2767 PetscInt dual_dofs,primal_dofs; 2768 /* working stuff for GEQRF */ 2769 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2770 PetscBLASInt lqr_work; 2771 /* working stuff for UNGQR */ 2772 PetscScalar *gqr_work,lgqr_work_t; 2773 PetscBLASInt lgqr_work; 2774 /* working stuff for TRTRS */ 2775 PetscScalar *trs_rhs; 2776 PetscBLASInt Blas_NRHS; 2777 /* pointers for values insertion into change of basis matrix */ 2778 PetscInt *start_rows,*start_cols; 2779 PetscScalar *start_vals; 2780 /* working stuff for values insertion */ 2781 PetscBT is_primal; 2782 PetscInt *aux_primal_numbering_B; 2783 /* matrix sizes */ 2784 PetscInt global_size,local_size; 2785 /* temporary change of basis */ 2786 Mat localChangeOfBasisMatrix; 2787 /* extra space for debugging */ 2788 PetscScalar *dbg_work; 2789 2790 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2791 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2792 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2793 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2794 /* nonzeros for local mat */ 2795 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2796 for (i=0;i<pcis->n;i++) nnz[i]=1; 2797 for (i=n_vertices;i<total_counts_cc;i++) { 2798 if (PetscBTLookup(change_basis,i)) { 2799 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2800 if (PetscBTLookup(qr_needed_idx,i)) { 2801 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2802 } else { 2803 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2804 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2805 } 2806 } 2807 } 2808 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2809 ierr = PetscFree(nnz);CHKERRQ(ierr); 2810 /* Set initial identity in the matrix */ 2811 for (i=0;i<pcis->n;i++) { 2812 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2813 } 2814 2815 if (pcbddc->dbg_flag) { 2816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2818 } 2819 2820 2821 /* Now we loop on the constraints which need a change of basis */ 2822 /* 2823 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2824 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2825 2826 Basic blocks of change of basis matrix T computed by 2827 2828 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2829 2830 | 1 0 ... 0 s_1/S | 2831 | 0 1 ... 0 s_2/S | 2832 | ... | 2833 | 0 ... 1 s_{n-1}/S | 2834 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2835 2836 with S = \sum_{i=1}^n s_i^2 2837 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2838 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2839 2840 - QR decomposition of constraints otherwise 2841 */ 2842 if (qr_needed) { 2843 /* space to store Q */ 2844 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2845 /* first we issue queries for optimal work */ 2846 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2847 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2848 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2849 lqr_work = -1; 2850 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2851 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2852 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2853 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2854 lgqr_work = -1; 2855 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2856 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2857 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2858 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2859 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2860 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2861 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2862 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2863 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2864 /* array to store scaling factors for reflectors */ 2865 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2866 /* array to store rhs and solution of triangular solver */ 2867 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2868 /* allocating workspace for check */ 2869 if (pcbddc->dbg_flag) { 2870 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2871 } 2872 } 2873 /* array to store whether a node is primal or not */ 2874 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2875 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2876 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2877 if (i != total_primal_vertices) { 2878 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2879 } 2880 for (i=0;i<total_primal_vertices;i++) { 2881 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2882 } 2883 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2884 2885 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2886 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2887 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2888 if (PetscBTLookup(change_basis,total_counts)) { 2889 /* get constraint info */ 2890 primal_dofs = constraints_n[total_counts]; 2891 dual_dofs = size_of_constraint-primal_dofs; 2892 2893 if (pcbddc->dbg_flag) { 2894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2895 } 2896 2897 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2898 2899 /* copy quadrature constraints for change of basis check */ 2900 if (pcbddc->dbg_flag) { 2901 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2902 } 2903 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2904 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2905 2906 /* compute QR decomposition of constraints */ 2907 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2908 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2910 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2911 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2912 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2913 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2914 2915 /* explictly compute R^-T */ 2916 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2917 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2918 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2919 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2920 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2921 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2922 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2923 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2924 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2925 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2926 2927 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2928 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2929 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2930 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2931 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2932 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2933 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2934 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2935 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2936 2937 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2938 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2939 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2941 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2942 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2944 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2947 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2948 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2949 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2950 2951 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2952 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2953 /* insert cols for primal dofs */ 2954 for (j=0;j<primal_dofs;j++) { 2955 start_vals = &qr_basis[j*size_of_constraint]; 2956 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2957 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2958 } 2959 /* insert cols for dual dofs */ 2960 for (j=0,k=0;j<dual_dofs;k++) { 2961 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2962 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2963 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2964 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2965 j++; 2966 } 2967 } 2968 2969 /* check change of basis */ 2970 if (pcbddc->dbg_flag) { 2971 PetscInt ii,jj; 2972 PetscBool valid_qr=PETSC_TRUE; 2973 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2978 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2980 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2981 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2982 for (jj=0;jj<size_of_constraint;jj++) { 2983 for (ii=0;ii<primal_dofs;ii++) { 2984 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2985 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2986 } 2987 } 2988 if (!valid_qr) { 2989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2990 for (jj=0;jj<size_of_constraint;jj++) { 2991 for (ii=0;ii<primal_dofs;ii++) { 2992 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2993 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2994 } 2995 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2996 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2997 } 2998 } 2999 } 3000 } else { 3001 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3002 } 3003 } 3004 } else { /* simple transformation block */ 3005 PetscInt row,col; 3006 PetscScalar val,norm; 3007 3008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3009 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3010 for (j=0;j<size_of_constraint;j++) { 3011 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3012 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3013 if (!PetscBTLookup(is_primal,row_B)) { 3014 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3015 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3016 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3017 } else { 3018 for (k=0;k<size_of_constraint;k++) { 3019 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3020 if (row != col) { 3021 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3022 } else { 3023 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3024 } 3025 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3026 } 3027 } 3028 } 3029 if (pcbddc->dbg_flag) { 3030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3031 } 3032 } 3033 } else { 3034 if (pcbddc->dbg_flag) { 3035 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3036 } 3037 } 3038 } 3039 3040 /* free workspace */ 3041 if (qr_needed) { 3042 if (pcbddc->dbg_flag) { 3043 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3044 } 3045 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3046 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3047 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3048 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3049 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3050 } 3051 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3052 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 3055 /* assembling of global change of variable */ 3056 { 3057 Mat tmat; 3058 PetscInt bs; 3059 3060 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3061 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3062 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3063 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3064 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3065 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3066 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3067 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3068 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3069 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3070 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3071 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3072 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3073 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3074 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3075 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3076 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3077 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3078 } 3079 /* check */ 3080 if (pcbddc->dbg_flag) { 3081 PetscReal error; 3082 Vec x,x_change; 3083 3084 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3085 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3086 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3087 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3088 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3089 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3090 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3091 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3092 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3093 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3094 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3095 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3098 ierr = VecDestroy(&x);CHKERRQ(ierr); 3099 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3100 } 3101 3102 /* adapt sub_schurs computed (if any) */ 3103 if (pcbddc->use_deluxe_scaling) { 3104 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3105 if (sub_schurs->S_Ej_all) { 3106 Mat S_new,tmat; 3107 IS is_all_N; 3108 3109 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3110 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3111 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3112 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3113 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3114 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3115 sub_schurs->S_Ej_all = S_new; 3116 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3117 if (sub_schurs->sum_S_Ej_all) { 3118 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3119 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3120 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3121 sub_schurs->sum_S_Ej_all = S_new; 3122 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3123 } 3124 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3125 } 3126 } 3127 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3128 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3129 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3130 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3131 } 3132 3133 /* set up change of basis context */ 3134 if (pcbddc->ChangeOfBasisMatrix) { 3135 PCBDDCChange_ctx change_ctx; 3136 3137 if (!pcbddc->new_global_mat) { 3138 PetscInt global_size,local_size; 3139 3140 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3141 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3142 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3143 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3144 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3145 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3146 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3147 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3148 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3149 } else { 3150 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3151 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3152 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3153 } 3154 if (!pcbddc->user_ChangeOfBasisMatrix) { 3155 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3156 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3157 } else { 3158 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3159 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3160 } 3161 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3162 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3163 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 } 3166 3167 /* add pressure dof to set of primal nodes for numbering purposes */ 3168 if (pcbddc->zerodiag) { 3169 const PetscInt *idxs; 3170 PetscInt nz; 3171 3172 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 3173 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3174 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3175 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3176 ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3177 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3178 pcbddc->local_primal_size_cc++; 3179 pcbddc->local_primal_size++; 3180 } 3181 3182 /* check if a new primal space has been introduced (also take into account benign trick) */ 3183 pcbddc->new_primal_space_local = PETSC_TRUE; 3184 if (olocal_primal_size == pcbddc->local_primal_size) { 3185 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3186 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3187 if (!pcbddc->new_primal_space_local) { 3188 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3189 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3190 } 3191 } 3192 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3193 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3194 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3195 3196 /* flush dbg viewer */ 3197 if (pcbddc->dbg_flag) { 3198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3199 } 3200 3201 /* free workspace */ 3202 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3203 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3204 if (!pcbddc->adaptive_selection) { 3205 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3206 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3207 } else { 3208 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3209 pcbddc->adaptive_constraints_idxs_ptr, 3210 pcbddc->adaptive_constraints_data_ptr, 3211 pcbddc->adaptive_constraints_idxs, 3212 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3213 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3214 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3215 } 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3221 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3222 { 3223 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3224 PC_IS *pcis = (PC_IS*)pc->data; 3225 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3226 PetscInt ierr,i,vertex_size,N; 3227 PetscViewer viewer=pcbddc->dbg_viewer; 3228 3229 PetscFunctionBegin; 3230 /* Reset previously computed graph */ 3231 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3232 /* Init local Graph struct */ 3233 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3234 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3235 3236 /* Check validity of the csr graph passed in by the user */ 3237 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3238 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3239 } 3240 3241 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3242 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3243 PetscInt *xadj,*adjncy; 3244 PetscInt nvtxs; 3245 PetscBool flg_row=PETSC_FALSE; 3246 3247 if (pcbddc->use_local_adj) { 3248 3249 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3250 if (flg_row) { 3251 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3252 pcbddc->computed_rowadj = PETSC_TRUE; 3253 } 3254 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3255 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3256 IS is_dummy; 3257 ISLocalToGlobalMapping l2gmap_dummy; 3258 PetscInt j,sum; 3259 PetscInt *cxadj,*cadjncy; 3260 const PetscInt *idxs; 3261 PCBDDCGraph graph; 3262 PetscBT is_on_boundary; 3263 3264 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3265 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3266 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3267 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3268 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3269 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3270 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3271 if (flg_row) { 3272 graph->xadj = xadj; 3273 graph->adjncy = adjncy; 3274 } 3275 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3276 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3277 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3278 3279 if (pcbddc->dbg_flag) { 3280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3281 for (i=0;i<graph->ncc;i++) { 3282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3283 } 3284 } 3285 3286 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3287 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3288 for (i=0;i<pcis->n_B;i++) { 3289 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3290 } 3291 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3292 3293 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3294 sum = 0; 3295 for (i=0;i<graph->ncc;i++) { 3296 PetscInt sizecc = 0; 3297 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3298 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3299 sizecc++; 3300 } 3301 } 3302 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3303 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3304 cxadj[graph->queue[j]] = sizecc; 3305 } 3306 } 3307 sum += sizecc*sizecc; 3308 } 3309 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3310 sum = 0; 3311 for (i=0;i<pcis->n;i++) { 3312 PetscInt temp = cxadj[i]; 3313 cxadj[i] = sum; 3314 sum += temp; 3315 } 3316 cxadj[pcis->n] = sum; 3317 for (i=0;i<graph->ncc;i++) { 3318 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3319 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3320 PetscInt k,sizecc = 0; 3321 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3322 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3323 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3324 sizecc++; 3325 } 3326 } 3327 } 3328 } 3329 } 3330 if (sum) { 3331 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3332 } else { 3333 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3334 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3335 } 3336 graph->xadj = 0; 3337 graph->adjncy = 0; 3338 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3339 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3340 } 3341 } 3342 if (pcbddc->dbg_flag) { 3343 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3344 } 3345 3346 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3347 vertex_size = 1; 3348 if (pcbddc->user_provided_isfordofs) { 3349 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3350 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3351 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3352 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3353 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3354 } 3355 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3356 pcbddc->n_ISForDofs = 0; 3357 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3358 } 3359 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3360 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3361 } else { 3362 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3363 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3364 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3365 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3366 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3367 } 3368 } 3369 } 3370 3371 /* Setup of Graph */ 3372 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3373 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3374 } 3375 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3376 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3377 } 3378 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3379 3380 /* Graph's connected components analysis */ 3381 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3382 3383 /* print some info to stdout */ 3384 if (pcbddc->dbg_flag) { 3385 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3386 } 3387 3388 /* mark topography has done */ 3389 pcbddc->recompute_topography = PETSC_FALSE; 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3394 #undef __FUNCT__ 3395 #define __FUNCT__ "PCBDDCSubsetNumbering" 3396 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3397 { 3398 PetscSF sf; 3399 PetscLayout map; 3400 const PetscInt *idxs; 3401 PetscInt *leaf_data,*root_data,*gidxs; 3402 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3403 PetscInt n_n,nlocals,start,first_index; 3404 PetscMPIInt commsize; 3405 PetscBool first_found; 3406 PetscErrorCode ierr; 3407 3408 PetscFunctionBegin; 3409 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3410 if (subset_mult) { 3411 PetscCheckSameComm(subset,1,subset_mult,2); 3412 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3413 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3414 } 3415 /* create workspace layout for computing global indices of subset */ 3416 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3417 lbounds[0] = lbounds[1] = 0; 3418 for (i=0;i<n;i++) { 3419 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3420 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3421 } 3422 lbounds[0] = -lbounds[0]; 3423 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3424 gbounds[0] = -gbounds[0]; 3425 N = gbounds[1] - gbounds[0] + 1; 3426 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3427 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3428 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3429 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3430 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3431 3432 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3433 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3434 if (subset_mult) { 3435 const PetscInt* idxs_mult; 3436 3437 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3438 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3439 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3440 } else { 3441 for (i=0;i<n;i++) leaf_data[i] = 1; 3442 } 3443 /* local size of new subset */ 3444 n_n = 0; 3445 for (i=0;i<n;i++) n_n += leaf_data[i]; 3446 3447 /* global indexes in layout */ 3448 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3449 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3450 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3451 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3452 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3453 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3454 3455 /* reduce from leaves to roots */ 3456 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3457 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3458 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3459 3460 /* count indexes in local part of layout */ 3461 nlocals = 0; 3462 first_index = -1; 3463 first_found = PETSC_FALSE; 3464 for (i=0;i<Nl;i++) { 3465 if (!first_found && root_data[i]) { 3466 first_found = PETSC_TRUE; 3467 first_index = i; 3468 } 3469 nlocals += root_data[i]; 3470 } 3471 3472 /* cumulative of number of indexes and size of subset without holes */ 3473 #if defined(PETSC_HAVE_MPI_EXSCAN) 3474 start = 0; 3475 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3476 #else 3477 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3478 start = start-nlocals; 3479 #endif 3480 3481 if (N_n) { /* compute total size of new subset if requested */ 3482 *N_n = start + nlocals; 3483 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3484 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3485 } 3486 3487 /* adapt root data with cumulative */ 3488 if (first_found) { 3489 PetscInt old_index; 3490 3491 root_data[first_index] += start; 3492 old_index = first_index; 3493 for (i=first_index+1;i<Nl;i++) { 3494 if (root_data[i]) { 3495 root_data[i] += root_data[old_index]; 3496 old_index = i; 3497 } 3498 } 3499 } 3500 3501 /* from roots to leaves */ 3502 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3503 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3504 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3505 3506 /* create new IS with global indexes without holes */ 3507 if (subset_mult) { 3508 const PetscInt* idxs_mult; 3509 PetscInt cum; 3510 3511 cum = 0; 3512 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3513 for (i=0;i<n;i++) { 3514 PetscInt j; 3515 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3516 } 3517 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3518 } else { 3519 for (i=0;i<n;i++) { 3520 gidxs[i] = leaf_data[i]-1; 3521 } 3522 } 3523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3524 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3525 PetscFunctionReturn(0); 3526 } 3527 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3530 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3531 { 3532 PetscInt i,j; 3533 PetscScalar *alphas; 3534 PetscErrorCode ierr; 3535 3536 PetscFunctionBegin; 3537 /* this implements stabilized Gram-Schmidt */ 3538 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3539 for (i=0;i<n;i++) { 3540 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3541 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3542 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3543 } 3544 ierr = PetscFree(alphas);CHKERRQ(ierr); 3545 PetscFunctionReturn(0); 3546 } 3547 3548 #undef __FUNCT__ 3549 #define __FUNCT__ "MatISGetSubassemblingPattern" 3550 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3551 { 3552 IS ranks_send_to; 3553 PetscInt n_neighs,*neighs,*n_shared,**shared; 3554 PetscMPIInt size,rank,color; 3555 PetscInt *xadj,*adjncy; 3556 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3557 PetscInt i,local_size,threshold=0; 3558 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3559 PetscSubcomm subcomm; 3560 PetscErrorCode ierr; 3561 3562 PetscFunctionBegin; 3563 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3564 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3565 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3566 3567 /* Get info on mapping */ 3568 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3569 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3570 3571 /* build local CSR graph of subdomains' connectivity */ 3572 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3573 xadj[0] = 0; 3574 xadj[1] = PetscMax(n_neighs-1,0); 3575 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3576 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3577 3578 if (threshold) { 3579 PetscInt xadj_count = 0; 3580 for (i=1;i<n_neighs;i++) { 3581 if (n_shared[i] > threshold) { 3582 adjncy[xadj_count] = neighs[i]; 3583 adjncy_wgt[xadj_count] = n_shared[i]; 3584 xadj_count++; 3585 } 3586 } 3587 xadj[1] = xadj_count; 3588 } else { 3589 if (xadj[1]) { 3590 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3591 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3592 } 3593 } 3594 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3595 if (use_square) { 3596 for (i=0;i<xadj[1];i++) { 3597 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3598 } 3599 } 3600 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3601 3602 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3603 3604 /* 3605 Restrict work on active processes only. 3606 */ 3607 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3608 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3609 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3610 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3611 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3612 if (color) { 3613 ierr = PetscFree(xadj);CHKERRQ(ierr); 3614 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3615 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3616 } else { 3617 Mat subdomain_adj; 3618 IS new_ranks,new_ranks_contig; 3619 MatPartitioning partitioner; 3620 PetscInt prank,rstart=0,rend=0; 3621 PetscInt *is_indices,*oldranks; 3622 PetscBool aggregate; 3623 3624 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3625 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3626 prank = rank; 3627 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3628 /* 3629 for (i=0;i<size;i++) { 3630 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3631 } 3632 */ 3633 for (i=0;i<xadj[1];i++) { 3634 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3635 } 3636 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3637 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3638 if (aggregate) { 3639 PetscInt lrows,row,ncols,*cols; 3640 PetscMPIInt nrank; 3641 PetscScalar *vals; 3642 3643 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3644 lrows = 0; 3645 if (nrank<redprocs) { 3646 lrows = size/redprocs; 3647 if (nrank<size%redprocs) lrows++; 3648 } 3649 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3650 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3651 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3652 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3653 row = nrank; 3654 ncols = xadj[1]-xadj[0]; 3655 cols = adjncy; 3656 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3657 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3658 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3659 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3660 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3661 ierr = PetscFree(xadj);CHKERRQ(ierr); 3662 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3663 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3664 ierr = PetscFree(vals);CHKERRQ(ierr); 3665 } else { 3666 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3667 } 3668 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3669 3670 /* Partition */ 3671 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3672 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3673 if (use_vwgt) { 3674 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3675 v_wgt[0] = local_size; 3676 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3677 } 3678 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3679 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3680 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3681 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3682 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3683 3684 /* renumber new_ranks to avoid "holes" in new set of processors */ 3685 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3686 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3687 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3688 if (!redprocs) { 3689 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3690 } else { 3691 PetscInt idxs[1]; 3692 PetscMPIInt tag; 3693 MPI_Request *reqs; 3694 3695 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3696 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3697 for (i=rstart;i<rend;i++) { 3698 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3699 } 3700 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3701 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3702 ierr = PetscFree(reqs);CHKERRQ(ierr); 3703 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3704 } 3705 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3706 /* clean up */ 3707 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3708 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3709 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3710 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3711 } 3712 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3713 3714 /* assemble parallel IS for sends */ 3715 i = 1; 3716 if (color) i=0; 3717 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3718 /* get back IS */ 3719 *is_sends = ranks_send_to; 3720 PetscFunctionReturn(0); 3721 } 3722 3723 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3724 3725 #undef __FUNCT__ 3726 #define __FUNCT__ "MatISSubassemble" 3727 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3728 { 3729 Mat local_mat; 3730 IS is_sends_internal; 3731 PetscInt rows,cols,new_local_rows; 3732 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3733 PetscBool ismatis,isdense,newisdense,destroy_mat; 3734 ISLocalToGlobalMapping l2gmap; 3735 PetscInt* l2gmap_indices; 3736 const PetscInt* is_indices; 3737 MatType new_local_type; 3738 /* buffers */ 3739 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3740 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3741 PetscInt *recv_buffer_idxs_local; 3742 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3743 /* MPI */ 3744 MPI_Comm comm,comm_n; 3745 PetscSubcomm subcomm; 3746 PetscMPIInt n_sends,n_recvs,commsize; 3747 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3748 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3749 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3750 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3751 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3752 PetscErrorCode ierr; 3753 3754 PetscFunctionBegin; 3755 /* TODO: add missing checks */ 3756 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3757 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3758 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3759 PetscValidLogicalCollectiveInt(mat,nis,7); 3760 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3761 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3762 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3763 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3764 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3765 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3766 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3767 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3768 PetscInt mrows,mcols,mnrows,mncols; 3769 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3770 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3771 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3772 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3773 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3774 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3775 } 3776 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3777 PetscValidLogicalCollectiveInt(mat,bs,0); 3778 /* prepare IS for sending if not provided */ 3779 if (!is_sends) { 3780 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3781 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3782 } else { 3783 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3784 is_sends_internal = is_sends; 3785 } 3786 3787 /* get comm */ 3788 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3789 3790 /* compute number of sends */ 3791 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3792 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3793 3794 /* compute number of receives */ 3795 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3796 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3797 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3798 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3799 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3800 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3801 ierr = PetscFree(iflags);CHKERRQ(ierr); 3802 3803 /* restrict comm if requested */ 3804 subcomm = 0; 3805 destroy_mat = PETSC_FALSE; 3806 if (restrict_comm) { 3807 PetscMPIInt color,subcommsize; 3808 3809 color = 0; 3810 if (restrict_full) { 3811 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3812 } else { 3813 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3814 } 3815 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3816 subcommsize = commsize - subcommsize; 3817 /* check if reuse has been requested */ 3818 if (reuse == MAT_REUSE_MATRIX) { 3819 if (*mat_n) { 3820 PetscMPIInt subcommsize2; 3821 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3822 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3823 comm_n = PetscObjectComm((PetscObject)*mat_n); 3824 } else { 3825 comm_n = PETSC_COMM_SELF; 3826 } 3827 } else { /* MAT_INITIAL_MATRIX */ 3828 PetscMPIInt rank; 3829 3830 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3831 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3832 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3833 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3834 comm_n = PetscSubcommChild(subcomm); 3835 } 3836 /* flag to destroy *mat_n if not significative */ 3837 if (color) destroy_mat = PETSC_TRUE; 3838 } else { 3839 comm_n = comm; 3840 } 3841 3842 /* prepare send/receive buffers */ 3843 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3844 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3845 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3846 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3847 if (nis) { 3848 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3849 } 3850 3851 /* Get data from local matrices */ 3852 if (!isdense) { 3853 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3854 /* TODO: See below some guidelines on how to prepare the local buffers */ 3855 /* 3856 send_buffer_vals should contain the raw values of the local matrix 3857 send_buffer_idxs should contain: 3858 - MatType_PRIVATE type 3859 - PetscInt size_of_l2gmap 3860 - PetscInt global_row_indices[size_of_l2gmap] 3861 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3862 */ 3863 } else { 3864 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3865 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3866 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3867 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3868 send_buffer_idxs[1] = i; 3869 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3870 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3871 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3872 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3873 for (i=0;i<n_sends;i++) { 3874 ilengths_vals[is_indices[i]] = len*len; 3875 ilengths_idxs[is_indices[i]] = len+2; 3876 } 3877 } 3878 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3879 /* additional is (if any) */ 3880 if (nis) { 3881 PetscMPIInt psum; 3882 PetscInt j; 3883 for (j=0,psum=0;j<nis;j++) { 3884 PetscInt plen; 3885 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3886 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3887 psum += len+1; /* indices + lenght */ 3888 } 3889 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3890 for (j=0,psum=0;j<nis;j++) { 3891 PetscInt plen; 3892 const PetscInt *is_array_idxs; 3893 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3894 send_buffer_idxs_is[psum] = plen; 3895 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3896 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3897 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3898 psum += plen+1; /* indices + lenght */ 3899 } 3900 for (i=0;i<n_sends;i++) { 3901 ilengths_idxs_is[is_indices[i]] = psum; 3902 } 3903 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3904 } 3905 3906 buf_size_idxs = 0; 3907 buf_size_vals = 0; 3908 buf_size_idxs_is = 0; 3909 for (i=0;i<n_recvs;i++) { 3910 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3911 buf_size_vals += (PetscInt)olengths_vals[i]; 3912 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3913 } 3914 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3915 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3916 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3917 3918 /* get new tags for clean communications */ 3919 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3920 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3921 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3922 3923 /* allocate for requests */ 3924 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3925 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3926 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3927 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3928 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3929 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3930 3931 /* communications */ 3932 ptr_idxs = recv_buffer_idxs; 3933 ptr_vals = recv_buffer_vals; 3934 ptr_idxs_is = recv_buffer_idxs_is; 3935 for (i=0;i<n_recvs;i++) { 3936 source_dest = onodes[i]; 3937 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3938 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3939 ptr_idxs += olengths_idxs[i]; 3940 ptr_vals += olengths_vals[i]; 3941 if (nis) { 3942 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3943 ptr_idxs_is += olengths_idxs_is[i]; 3944 } 3945 } 3946 for (i=0;i<n_sends;i++) { 3947 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3948 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3949 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3950 if (nis) { 3951 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3952 } 3953 } 3954 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3955 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3956 3957 /* assemble new l2g map */ 3958 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3959 ptr_idxs = recv_buffer_idxs; 3960 new_local_rows = 0; 3961 for (i=0;i<n_recvs;i++) { 3962 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3963 ptr_idxs += olengths_idxs[i]; 3964 } 3965 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3966 ptr_idxs = recv_buffer_idxs; 3967 new_local_rows = 0; 3968 for (i=0;i<n_recvs;i++) { 3969 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3970 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3971 ptr_idxs += olengths_idxs[i]; 3972 } 3973 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3974 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3975 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3976 3977 /* infer new local matrix type from received local matrices type */ 3978 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3979 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3980 if (n_recvs) { 3981 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3982 ptr_idxs = recv_buffer_idxs; 3983 for (i=0;i<n_recvs;i++) { 3984 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3985 new_local_type_private = MATAIJ_PRIVATE; 3986 break; 3987 } 3988 ptr_idxs += olengths_idxs[i]; 3989 } 3990 switch (new_local_type_private) { 3991 case MATDENSE_PRIVATE: 3992 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3993 new_local_type = MATSEQAIJ; 3994 bs = 1; 3995 } else { /* if I receive only 1 dense matrix */ 3996 new_local_type = MATSEQDENSE; 3997 bs = 1; 3998 } 3999 break; 4000 case MATAIJ_PRIVATE: 4001 new_local_type = MATSEQAIJ; 4002 bs = 1; 4003 break; 4004 case MATBAIJ_PRIVATE: 4005 new_local_type = MATSEQBAIJ; 4006 break; 4007 case MATSBAIJ_PRIVATE: 4008 new_local_type = MATSEQSBAIJ; 4009 break; 4010 default: 4011 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4012 break; 4013 } 4014 } else { /* by default, new_local_type is seqdense */ 4015 new_local_type = MATSEQDENSE; 4016 bs = 1; 4017 } 4018 4019 /* create MATIS object if needed */ 4020 if (reuse == MAT_INITIAL_MATRIX) { 4021 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4022 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4023 } else { 4024 /* it also destroys the local matrices */ 4025 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4026 } 4027 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4028 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4029 4030 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4031 4032 /* Global to local map of received indices */ 4033 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4034 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4035 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4036 4037 /* restore attributes -> type of incoming data and its size */ 4038 buf_size_idxs = 0; 4039 for (i=0;i<n_recvs;i++) { 4040 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4041 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4042 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4043 } 4044 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4045 4046 /* set preallocation */ 4047 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4048 if (!newisdense) { 4049 PetscInt *new_local_nnz=0; 4050 4051 ptr_vals = recv_buffer_vals; 4052 ptr_idxs = recv_buffer_idxs_local; 4053 if (n_recvs) { 4054 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4055 } 4056 for (i=0;i<n_recvs;i++) { 4057 PetscInt j; 4058 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4059 for (j=0;j<*(ptr_idxs+1);j++) { 4060 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4061 } 4062 } else { 4063 /* TODO */ 4064 } 4065 ptr_idxs += olengths_idxs[i]; 4066 } 4067 if (new_local_nnz) { 4068 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4069 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4070 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4071 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4072 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4073 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4074 } else { 4075 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4076 } 4077 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4078 } else { 4079 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4080 } 4081 4082 /* set values */ 4083 ptr_vals = recv_buffer_vals; 4084 ptr_idxs = recv_buffer_idxs_local; 4085 for (i=0;i<n_recvs;i++) { 4086 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4087 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4088 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4089 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4090 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4091 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4092 } else { 4093 /* TODO */ 4094 } 4095 ptr_idxs += olengths_idxs[i]; 4096 ptr_vals += olengths_vals[i]; 4097 } 4098 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4099 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4100 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4101 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4102 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4103 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4104 4105 #if 0 4106 if (!restrict_comm) { /* check */ 4107 Vec lvec,rvec; 4108 PetscReal infty_error; 4109 4110 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4111 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4112 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4113 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4114 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4115 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4116 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4117 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4118 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4119 } 4120 #endif 4121 4122 /* assemble new additional is (if any) */ 4123 if (nis) { 4124 PetscInt **temp_idxs,*count_is,j,psum; 4125 4126 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4127 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4128 ptr_idxs = recv_buffer_idxs_is; 4129 psum = 0; 4130 for (i=0;i<n_recvs;i++) { 4131 for (j=0;j<nis;j++) { 4132 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4133 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4134 psum += plen; 4135 ptr_idxs += plen+1; /* shift pointer to received data */ 4136 } 4137 } 4138 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4139 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4140 for (i=1;i<nis;i++) { 4141 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4142 } 4143 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4144 ptr_idxs = recv_buffer_idxs_is; 4145 for (i=0;i<n_recvs;i++) { 4146 for (j=0;j<nis;j++) { 4147 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4148 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4149 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4150 ptr_idxs += plen+1; /* shift pointer to received data */ 4151 } 4152 } 4153 for (i=0;i<nis;i++) { 4154 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4155 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4156 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4157 } 4158 ierr = PetscFree(count_is);CHKERRQ(ierr); 4159 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4160 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4161 } 4162 /* free workspace */ 4163 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4164 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4165 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4166 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4167 if (isdense) { 4168 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4169 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4170 } else { 4171 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4172 } 4173 if (nis) { 4174 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4175 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4176 } 4177 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4178 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4179 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4180 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4181 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4182 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4183 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4184 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4185 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4186 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4187 ierr = PetscFree(onodes);CHKERRQ(ierr); 4188 if (nis) { 4189 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4190 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4191 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4192 } 4193 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4194 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4195 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4196 for (i=0;i<nis;i++) { 4197 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4198 } 4199 *mat_n = NULL; 4200 } 4201 PetscFunctionReturn(0); 4202 } 4203 4204 /* temporary hack into ksp private data structure */ 4205 #include <petsc/private/kspimpl.h> 4206 4207 #undef __FUNCT__ 4208 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4209 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4210 { 4211 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4212 PC_IS *pcis = (PC_IS*)pc->data; 4213 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4214 MatNullSpace CoarseNullSpace=NULL; 4215 ISLocalToGlobalMapping coarse_islg; 4216 IS coarse_is,*isarray; 4217 PetscInt i,im_active=-1,active_procs=-1; 4218 PetscInt nis,nisdofs,nisneu; 4219 PC pc_temp; 4220 PCType coarse_pc_type; 4221 KSPType coarse_ksp_type; 4222 PetscBool multilevel_requested,multilevel_allowed; 4223 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4224 Mat t_coarse_mat_is; 4225 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4226 PetscMPIInt all_procs; 4227 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4228 PetscBool compute_vecs = PETSC_FALSE; 4229 PetscScalar *array; 4230 PetscErrorCode ierr; 4231 4232 PetscFunctionBegin; 4233 /* Assign global numbering to coarse dofs */ 4234 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4235 PetscInt ocoarse_size; 4236 compute_vecs = PETSC_TRUE; 4237 ocoarse_size = pcbddc->coarse_size; 4238 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4239 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4240 /* see if we can avoid some work */ 4241 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4242 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4243 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4244 PC pc; 4245 PetscBool isbddc; 4246 4247 /* temporary workaround since PCBDDC does not have a reset method so far */ 4248 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4249 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4250 if (isbddc) { 4251 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4252 } 4253 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4254 coarse_reuse = PETSC_FALSE; 4255 } else { /* we can safely reuse already computed coarse matrix */ 4256 coarse_reuse = PETSC_TRUE; 4257 } 4258 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4259 coarse_reuse = PETSC_FALSE; 4260 } 4261 /* reset any subassembling information */ 4262 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4263 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4264 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4265 coarse_reuse = PETSC_TRUE; 4266 } 4267 4268 /* count "active" (i.e. with positive local size) and "void" processes */ 4269 im_active = !!(pcis->n); 4270 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4272 void_procs = all_procs-active_procs; 4273 csin_type_simple = PETSC_TRUE; 4274 redist = PETSC_FALSE; 4275 if (pcbddc->current_level && void_procs) { 4276 csin_ml = PETSC_TRUE; 4277 ncoarse_ml = void_procs; 4278 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4279 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4280 csin_ds = PETSC_TRUE; 4281 ncoarse_ds = pcbddc->redistribute_coarse; 4282 redist = PETSC_TRUE; 4283 } else { 4284 csin_ds = PETSC_TRUE; 4285 ncoarse_ds = active_procs; 4286 redist = PETSC_TRUE; 4287 } 4288 } else { 4289 csin_ml = PETSC_FALSE; 4290 ncoarse_ml = all_procs; 4291 if (void_procs) { 4292 csin_ds = PETSC_TRUE; 4293 ncoarse_ds = void_procs; 4294 csin_type_simple = PETSC_FALSE; 4295 } else { 4296 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4297 csin_ds = PETSC_TRUE; 4298 ncoarse_ds = pcbddc->redistribute_coarse; 4299 redist = PETSC_TRUE; 4300 } else { 4301 csin_ds = PETSC_FALSE; 4302 ncoarse_ds = all_procs; 4303 } 4304 } 4305 } 4306 4307 /* 4308 test if we can go multilevel: three conditions must be satisfied: 4309 - we have not exceeded the number of levels requested 4310 - we can actually subassemble the active processes 4311 - we can find a suitable number of MPI processes where we can place the subassembled problem 4312 */ 4313 multilevel_allowed = PETSC_FALSE; 4314 multilevel_requested = PETSC_FALSE; 4315 if (pcbddc->current_level < pcbddc->max_levels) { 4316 multilevel_requested = PETSC_TRUE; 4317 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4318 multilevel_allowed = PETSC_FALSE; 4319 } else { 4320 multilevel_allowed = PETSC_TRUE; 4321 } 4322 } 4323 /* determine number of process partecipating to coarse solver */ 4324 if (multilevel_allowed) { 4325 ncoarse = ncoarse_ml; 4326 csin = csin_ml; 4327 redist = PETSC_FALSE; 4328 } else { 4329 ncoarse = ncoarse_ds; 4330 csin = csin_ds; 4331 } 4332 4333 /* creates temporary l2gmap and IS for coarse indexes */ 4334 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4335 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4336 4337 /* creates temporary MATIS object for coarse matrix */ 4338 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4339 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4340 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4341 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4342 #if 0 4343 { 4344 PetscViewer viewer; 4345 char filename[256]; 4346 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4347 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4348 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4349 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4350 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4351 } 4352 #endif 4353 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4354 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4355 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4358 4359 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4360 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4361 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4362 const PetscInt *idxs; 4363 ISLocalToGlobalMapping tmap; 4364 4365 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4366 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4367 /* allocate space for temporary storage */ 4368 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4369 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4370 /* allocate for IS array */ 4371 nisdofs = pcbddc->n_ISForDofsLocal; 4372 nisneu = !!pcbddc->NeumannBoundariesLocal; 4373 nis = nisdofs + nisneu; 4374 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4375 /* dofs splitting */ 4376 for (i=0;i<nisdofs;i++) { 4377 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4378 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4379 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4380 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4381 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4382 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4383 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4384 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4385 } 4386 /* neumann boundaries */ 4387 if (pcbddc->NeumannBoundariesLocal) { 4388 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4389 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4390 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4391 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4392 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4393 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4394 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4395 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4396 } 4397 /* free memory */ 4398 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4399 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4400 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4401 } else { 4402 nis = 0; 4403 nisdofs = 0; 4404 nisneu = 0; 4405 isarray = NULL; 4406 } 4407 /* destroy no longer needed map */ 4408 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4409 4410 /* restrict on coarse candidates (if needed) */ 4411 coarse_mat_is = NULL; 4412 if (csin) { 4413 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4414 if (redist) { 4415 PetscMPIInt rank; 4416 PetscInt spc,n_spc_p1,dest[1],destsize; 4417 4418 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4419 spc = active_procs/ncoarse; 4420 n_spc_p1 = active_procs%ncoarse; 4421 if (im_active) { 4422 destsize = 1; 4423 if (rank > n_spc_p1*(spc+1)-1) { 4424 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4425 } else { 4426 dest[0] = rank/(spc+1); 4427 } 4428 } else { 4429 destsize = 0; 4430 } 4431 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4432 } else if (csin_type_simple) { 4433 PetscMPIInt rank; 4434 PetscInt issize,isidx; 4435 4436 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4437 if (im_active) { 4438 issize = 1; 4439 isidx = (PetscInt)rank; 4440 } else { 4441 issize = 0; 4442 isidx = -1; 4443 } 4444 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4445 } else { /* get a suitable subassembling pattern from MATIS code */ 4446 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4447 } 4448 4449 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4450 if (!redist || ncoarse <= void_procs) { 4451 PetscInt ncoarse_cand,tissize,*nisindices; 4452 PetscInt *coarse_candidates; 4453 const PetscInt* tisindices; 4454 4455 /* get coarse candidates' ranks in pc communicator */ 4456 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4457 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4458 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4459 if (!coarse_candidates[i]) { 4460 coarse_candidates[ncoarse_cand++]=i; 4461 } 4462 } 4463 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4464 4465 4466 if (pcbddc->dbg_flag) { 4467 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4469 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4470 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4471 for (i=0;i<ncoarse_cand;i++) { 4472 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4473 } 4474 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4475 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4476 } 4477 /* shift the pattern on coarse candidates */ 4478 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4479 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4480 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4481 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4482 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4483 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4484 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4485 } 4486 if (pcbddc->dbg_flag) { 4487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4489 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4491 } 4492 } 4493 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4494 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4495 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4496 } else { /* this is the last level, so use just receiving processes in subcomm */ 4497 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4498 } 4499 } else { 4500 if (pcbddc->dbg_flag) { 4501 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4502 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4504 } 4505 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4506 coarse_mat_is = t_coarse_mat_is; 4507 } 4508 4509 /* create local to global scatters for coarse problem */ 4510 if (compute_vecs) { 4511 PetscInt lrows; 4512 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4513 if (coarse_mat_is) { 4514 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4515 } else { 4516 lrows = 0; 4517 } 4518 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4519 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4520 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4521 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4522 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4523 } 4524 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4525 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4526 4527 /* set defaults for coarse KSP and PC */ 4528 if (multilevel_allowed) { 4529 coarse_ksp_type = KSPRICHARDSON; 4530 coarse_pc_type = PCBDDC; 4531 } else { 4532 coarse_ksp_type = KSPPREONLY; 4533 coarse_pc_type = PCREDUNDANT; 4534 } 4535 4536 /* print some info if requested */ 4537 if (pcbddc->dbg_flag) { 4538 if (!multilevel_allowed) { 4539 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4540 if (multilevel_requested) { 4541 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4542 } else if (pcbddc->max_levels) { 4543 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4544 } 4545 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4546 } 4547 } 4548 4549 /* create the coarse KSP object only once with defaults */ 4550 if (coarse_mat_is) { 4551 MatReuse coarse_mat_reuse; 4552 PetscViewer dbg_viewer = NULL; 4553 if (pcbddc->dbg_flag) { 4554 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4555 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4556 } 4557 if (!pcbddc->coarse_ksp) { 4558 char prefix[256],str_level[16]; 4559 size_t len; 4560 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4561 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4562 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4563 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4564 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4565 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4566 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4567 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4568 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4569 /* prefix */ 4570 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4571 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4572 if (!pcbddc->current_level) { 4573 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4574 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4575 } else { 4576 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4577 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4578 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4579 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4580 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4581 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4582 } 4583 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4584 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4585 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4586 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4587 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4588 /* allow user customization */ 4589 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4590 } 4591 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4592 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4593 if (nisdofs) { 4594 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4595 for (i=0;i<nisdofs;i++) { 4596 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4597 } 4598 } 4599 if (nisneu) { 4600 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4601 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4602 } 4603 4604 /* get some info after set from options */ 4605 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4606 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4607 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4608 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4609 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4610 isbddc = PETSC_FALSE; 4611 } 4612 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4613 if (isredundant) { 4614 KSP inner_ksp; 4615 PC inner_pc; 4616 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4617 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4618 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4619 } 4620 4621 /* assemble coarse matrix */ 4622 if (coarse_reuse) { 4623 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4624 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4625 coarse_mat_reuse = MAT_REUSE_MATRIX; 4626 } else { 4627 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4628 } 4629 if (isbddc || isnn) { 4630 if (pcbddc->coarsening_ratio > 1) { 4631 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4632 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4633 if (pcbddc->dbg_flag) { 4634 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4635 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4636 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4637 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4638 } 4639 } 4640 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4641 } else { 4642 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4643 coarse_mat = coarse_mat_is; 4644 } 4645 } else { 4646 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4647 } 4648 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4649 4650 /* propagate symmetry info of coarse matrix */ 4651 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4652 if (pc->pmat->symmetric_set) { 4653 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4654 } 4655 if (pc->pmat->hermitian_set) { 4656 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4657 } 4658 if (pc->pmat->spd_set) { 4659 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4660 } 4661 /* set operators */ 4662 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4663 if (pcbddc->dbg_flag) { 4664 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4665 } 4666 } else { /* processes non partecipating to coarse solver (if any) */ 4667 coarse_mat = 0; 4668 } 4669 ierr = PetscFree(isarray);CHKERRQ(ierr); 4670 #if 0 4671 { 4672 PetscViewer viewer; 4673 char filename[256]; 4674 sprintf(filename,"coarse_mat.m"); 4675 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4676 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4677 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4678 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4679 } 4680 #endif 4681 4682 /* Compute coarse null space (special handling by BDDC only) */ 4683 #if 0 4684 if (pcbddc->NullSpace) { 4685 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4686 } 4687 #endif 4688 4689 if (pcbddc->coarse_ksp) { 4690 Vec crhs,csol; 4691 PetscBool ispreonly; 4692 4693 if (CoarseNullSpace) { 4694 if (isbddc) { 4695 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4696 } else { 4697 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4698 } 4699 } 4700 /* setup coarse ksp */ 4701 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4702 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4703 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4704 /* hack */ 4705 if (!csol) { 4706 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4707 } 4708 if (!crhs) { 4709 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4710 } 4711 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4712 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4713 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4714 KSP check_ksp; 4715 KSPType check_ksp_type; 4716 PC check_pc; 4717 Vec check_vec,coarse_vec; 4718 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4719 PetscInt its; 4720 PetscBool compute_eigs; 4721 PetscReal *eigs_r,*eigs_c; 4722 PetscInt neigs; 4723 const char *prefix; 4724 4725 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4726 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4727 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4728 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4729 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4730 if (ispreonly) { 4731 check_ksp_type = KSPPREONLY; 4732 compute_eigs = PETSC_FALSE; 4733 } else { 4734 check_ksp_type = KSPGMRES; 4735 compute_eigs = PETSC_TRUE; 4736 } 4737 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4738 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4739 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4740 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4741 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4742 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4743 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4744 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4745 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4746 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4747 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4748 /* create random vec */ 4749 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4750 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4751 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4752 if (CoarseNullSpace) { 4753 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4754 } 4755 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4756 /* solve coarse problem */ 4757 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4758 if (CoarseNullSpace) { 4759 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4760 } 4761 /* set eigenvalue estimation if preonly has not been requested */ 4762 if (compute_eigs) { 4763 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4764 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4765 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4766 lambda_max = eigs_r[neigs-1]; 4767 lambda_min = eigs_r[0]; 4768 if (pcbddc->use_coarse_estimates) { 4769 if (lambda_max>lambda_min) { 4770 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4771 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4772 } 4773 } 4774 } 4775 4776 /* check coarse problem residual error */ 4777 if (pcbddc->dbg_flag) { 4778 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4779 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4780 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4781 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4782 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4783 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4784 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4785 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4786 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4787 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4788 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4789 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4790 if (compute_eigs) { 4791 PetscReal lambda_max_s,lambda_min_s; 4792 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4793 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4794 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4795 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4796 for (i=0;i<neigs;i++) { 4797 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4798 } 4799 } 4800 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4801 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4802 } 4803 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4804 if (compute_eigs) { 4805 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4806 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4807 } 4808 } 4809 } 4810 /* print additional info */ 4811 if (pcbddc->dbg_flag) { 4812 /* waits until all processes reaches this point */ 4813 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4814 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4815 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4816 } 4817 4818 /* free memory */ 4819 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4820 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4821 PetscFunctionReturn(0); 4822 } 4823 4824 #undef __FUNCT__ 4825 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4826 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4827 { 4828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4829 PC_IS* pcis = (PC_IS*)pc->data; 4830 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4831 IS subset,subset_mult,subset_n; 4832 PetscInt local_size,coarse_size=0; 4833 PetscInt *local_primal_indices=NULL; 4834 const PetscInt *t_local_primal_indices; 4835 PetscErrorCode ierr; 4836 4837 PetscFunctionBegin; 4838 /* Compute global number of coarse dofs */ 4839 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4840 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4841 } 4842 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4843 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4844 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4845 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4846 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4847 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4848 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4849 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4850 if (local_size != pcbddc->local_primal_size) { 4851 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4852 } 4853 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4854 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4855 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4856 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4857 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4858 4859 /* check numbering */ 4860 if (pcbddc->dbg_flag) { 4861 PetscScalar coarsesum,*array,*array2; 4862 PetscInt i; 4863 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4864 4865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4867 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4868 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4869 /* counter */ 4870 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4871 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4872 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4873 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4874 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4875 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4876 4877 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4878 for (i=0;i<pcbddc->local_primal_size;i++) { 4879 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4880 } 4881 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4882 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4883 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4884 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4885 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4886 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4887 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4888 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4889 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4890 for (i=0;i<pcis->n;i++) { 4891 if (array[i] != 0.0 && array[i] != array2[i]) { 4892 PetscInt owned = (PetscInt)(array[i]); 4893 PetscInt neigh = (PetscInt)(array2[i]); 4894 set_error = PETSC_TRUE; 4895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4896 } 4897 } 4898 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4899 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4901 for (i=0;i<pcis->n;i++) { 4902 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4903 } 4904 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4905 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4906 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4907 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4908 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4909 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4910 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4911 PetscInt *gidxs; 4912 4913 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4914 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4915 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4916 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4917 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4918 for (i=0;i<pcbddc->local_primal_size;i++) { 4919 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4920 } 4921 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4922 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4923 } 4924 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4925 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4926 } 4927 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4928 /* get back data */ 4929 *coarse_size_n = coarse_size; 4930 *local_primal_indices_n = local_primal_indices; 4931 PetscFunctionReturn(0); 4932 } 4933 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "PCBDDCGlobalToLocal" 4936 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4937 { 4938 IS localis_t; 4939 PetscInt i,lsize,*idxs,n; 4940 PetscScalar *vals; 4941 PetscErrorCode ierr; 4942 4943 PetscFunctionBegin; 4944 /* get indices in local ordering exploiting local to global map */ 4945 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4946 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4947 for (i=0;i<lsize;i++) vals[i] = 1.0; 4948 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4949 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4950 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4951 if (idxs) { /* multilevel guard */ 4952 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4953 } 4954 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4955 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4956 ierr = PetscFree(vals);CHKERRQ(ierr); 4957 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4958 /* now compute set in local ordering */ 4959 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4960 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4961 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4962 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4963 for (i=0,lsize=0;i<n;i++) { 4964 if (PetscRealPart(vals[i]) > 0.5) { 4965 lsize++; 4966 } 4967 } 4968 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4969 for (i=0,lsize=0;i<n;i++) { 4970 if (PetscRealPart(vals[i]) > 0.5) { 4971 idxs[lsize++] = i; 4972 } 4973 } 4974 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4975 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4976 *localis = localis_t; 4977 PetscFunctionReturn(0); 4978 } 4979 4980 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4981 #undef __FUNCT__ 4982 #define __FUNCT__ "PCBDDCMatMult_Private" 4983 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4984 { 4985 PCBDDCChange_ctx change_ctx; 4986 PetscErrorCode ierr; 4987 4988 PetscFunctionBegin; 4989 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4990 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4991 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4992 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 #undef __FUNCT__ 4997 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4998 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4999 { 5000 PCBDDCChange_ctx change_ctx; 5001 PetscErrorCode ierr; 5002 5003 PetscFunctionBegin; 5004 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5005 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5006 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5007 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5008 PetscFunctionReturn(0); 5009 } 5010 5011 #undef __FUNCT__ 5012 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5013 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5014 { 5015 PC_IS *pcis=(PC_IS*)pc->data; 5016 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5017 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5018 Mat S_j; 5019 PetscInt *used_xadj,*used_adjncy; 5020 PetscBool free_used_adj; 5021 PetscErrorCode ierr; 5022 5023 PetscFunctionBegin; 5024 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5025 free_used_adj = PETSC_FALSE; 5026 if (pcbddc->sub_schurs_layers == -1) { 5027 used_xadj = NULL; 5028 used_adjncy = NULL; 5029 } else { 5030 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5031 used_xadj = pcbddc->mat_graph->xadj; 5032 used_adjncy = pcbddc->mat_graph->adjncy; 5033 } else if (pcbddc->computed_rowadj) { 5034 used_xadj = pcbddc->mat_graph->xadj; 5035 used_adjncy = pcbddc->mat_graph->adjncy; 5036 } else { 5037 PetscBool flg_row=PETSC_FALSE; 5038 const PetscInt *xadj,*adjncy; 5039 PetscInt nvtxs; 5040 5041 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5042 if (flg_row) { 5043 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5044 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5045 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5046 free_used_adj = PETSC_TRUE; 5047 } else { 5048 pcbddc->sub_schurs_layers = -1; 5049 used_xadj = NULL; 5050 used_adjncy = NULL; 5051 } 5052 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5053 } 5054 } 5055 5056 /* setup sub_schurs data */ 5057 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5058 if (!sub_schurs->use_mumps) { 5059 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5060 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5061 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5062 } else { 5063 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5064 PetscBool isseqaij; 5065 if (!pcbddc->use_vertices && reuse_solvers) { 5066 PetscInt n_vertices; 5067 5068 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5069 reuse_solvers = (PetscBool)!n_vertices; 5070 } 5071 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5072 if (!isseqaij) { 5073 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5074 if (matis->A == pcbddc->local_mat) { 5075 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5076 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5077 } else { 5078 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5079 } 5080 } 5081 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5082 } 5083 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5084 5085 /* free adjacency */ 5086 if (free_used_adj) { 5087 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5088 } 5089 PetscFunctionReturn(0); 5090 } 5091 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "PCBDDCInitSubSchurs" 5094 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5095 { 5096 PC_IS *pcis=(PC_IS*)pc->data; 5097 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5098 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5099 PCBDDCGraph graph; 5100 PetscErrorCode ierr; 5101 5102 PetscFunctionBegin; 5103 /* attach interface graph for determining subsets */ 5104 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5105 IS verticesIS,verticescomm; 5106 PetscInt vsize,*idxs; 5107 5108 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5109 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5110 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5111 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5112 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5113 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5114 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5115 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5116 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5117 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5118 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5119 /* 5120 if (pcbddc->dbg_flag) { 5121 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5122 } 5123 */ 5124 } else { 5125 graph = pcbddc->mat_graph; 5126 } 5127 5128 /* sub_schurs init */ 5129 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5130 5131 /* free graph struct */ 5132 if (pcbddc->sub_schurs_rebuild) { 5133 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5134 } 5135 PetscFunctionReturn(0); 5136 } 5137