1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 10 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 const PetscInt *idxs; 14 PetscInt nz = 0; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 if (!pcbddc->zerodiag) { 19 PetscFunctionReturn(0); 20 } else { 21 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 22 if (!nz) PetscFunctionReturn(0); 23 } 24 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 25 /* TODO: add error checking 26 - avoid nested pop (or push) calls. 27 - cannot push before pop. 28 */ 29 if (pop) { 30 const PetscInt *cB0_cols; 31 PetscInt cB0_ncol; 32 const PetscScalar *cB0_vals; 33 34 /* extract B_0 */ 35 ierr = MatGetRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 36 pcbddc->B0_ncol = cB0_ncol; 37 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 38 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 39 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 40 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 41 ierr = MatRestoreRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 42 /* remove rows and cols from local problem */ 43 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 44 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,idxs+nz-1,1.,NULL,NULL);CHKERRQ(ierr); 45 } else { /* push */ 46 ierr = MatSetValues(pcbddc->local_mat,1,&idxs[nz-1],pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 47 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&idxs[nz-1],pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 48 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 49 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 50 } 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "PCBDDCAdaptiveSelection" 56 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 57 { 58 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 59 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 60 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 61 PetscBLASInt *B_iwork,*B_ifail; 62 PetscScalar *work,lwork; 63 PetscScalar *St,*S,*eigv; 64 PetscScalar *Sarray,*Starray; 65 PetscReal *eigs,thresh; 66 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 67 PetscBool allocated_S_St; 68 #if defined(PETSC_USE_COMPLEX) 69 PetscReal *rwork; 70 #endif 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 if (!sub_schurs->use_mumps) { 75 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 76 } 77 78 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 79 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 80 } 81 82 if (pcbddc->dbg_flag) { 83 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 84 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 87 } 88 89 if (pcbddc->dbg_flag) { 90 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 91 } 92 93 /* max size of subsets */ 94 mss = 0; 95 for (i=0;i<sub_schurs->n_subs;i++) { 96 PetscInt subset_size; 97 98 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 99 mss = PetscMax(mss,subset_size); 100 } 101 102 /* min/max and threshold */ 103 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 104 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 105 nmax = PetscMax(nmin,nmax); 106 allocated_S_St = PETSC_FALSE; 107 if (nmin) { 108 allocated_S_St = PETSC_TRUE; 109 } 110 111 /* allocate lapack workspace */ 112 cum = cum2 = 0; 113 maxneigs = 0; 114 for (i=0;i<sub_schurs->n_subs;i++) { 115 PetscInt n,subset_size; 116 117 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 118 n = PetscMin(subset_size,nmax); 119 cum += subset_size; 120 cum2 += subset_size*n; 121 maxneigs = PetscMax(maxneigs,n); 122 } 123 if (mss) { 124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 125 PetscBLASInt B_itype = 1; 126 PetscBLASInt B_N = mss; 127 PetscReal zero = 0.0; 128 PetscReal eps = 0.0; /* dlamch? */ 129 130 B_lwork = -1; 131 S = NULL; 132 St = NULL; 133 eigs = NULL; 134 eigv = NULL; 135 B_iwork = NULL; 136 B_ifail = NULL; 137 #if defined(PETSC_USE_COMPLEX) 138 rwork = NULL; 139 #endif 140 thresh = 1.0; 141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 142 #if defined(PETSC_USE_COMPLEX) 143 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 144 #else 145 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 146 #endif 147 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 148 ierr = PetscFPTrapPop();CHKERRQ(ierr); 149 } else { 150 /* TODO */ 151 } 152 } else { 153 lwork = 0; 154 } 155 156 nv = 0; 157 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 158 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 159 } 160 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 161 if (allocated_S_St) { 162 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 163 } 164 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 165 #if defined(PETSC_USE_COMPLEX) 166 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 167 #endif 168 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 169 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 170 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 171 nv+cum,&pcbddc->adaptive_constraints_idxs, 172 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 173 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 174 175 maxneigs = 0; 176 cum = cum2 = cumarray = 0; 177 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 178 pcbddc->adaptive_constraints_data_ptr[0] = 0; 179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 180 const PetscInt *idxs; 181 182 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 183 for (cum=0;cum<nv;cum++) { 184 pcbddc->adaptive_constraints_n[cum] = 1; 185 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 186 pcbddc->adaptive_constraints_data[cum] = 1.0; 187 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 188 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 189 } 190 cum2 = cum; 191 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 192 } 193 194 if (mss) { /* multilevel */ 195 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 196 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 197 } 198 199 for (i=0;i<sub_schurs->n_subs;i++) { 200 201 const PetscInt *idxs; 202 PetscReal infty = PETSC_MAX_REAL; 203 PetscInt j,subset_size,eigs_start = 0; 204 PetscBLASInt B_N; 205 PetscBool same_data = PETSC_FALSE; 206 207 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 208 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 209 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 210 if (sub_schurs->is_hermitian) { 211 PetscInt j,k; 212 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 213 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 214 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 for (j=0;j<subset_size;j++) { 217 for (k=j;k<subset_size;k++) { 218 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 219 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 220 } 221 } 222 } else { 223 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 224 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 225 } 226 } else { 227 S = Sarray + cumarray; 228 St = Starray + cumarray; 229 } 230 231 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 232 /* see if we can save some work */ 233 if (sub_schurs->n_subs == 1) { 234 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 235 } 236 237 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 238 B_neigs = 0; 239 } else { 240 /* Threshold: this is an heuristic for edges */ 241 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 242 243 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 244 PetscBLASInt B_itype = 1; 245 PetscBLASInt B_IL, B_IU; 246 PetscReal eps = -1.0; /* dlamch? */ 247 PetscInt nmin_s; 248 249 /* ask for eigenvalues larger than thresh */ 250 if (pcbddc->dbg_flag) { 251 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 if (B_ierr) { 261 if (B_ierr < 0 ) { 262 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 263 } else if (B_ierr <= B_N) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 265 } else { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 267 } 268 } 269 270 if (B_neigs > nmax) { 271 if (pcbddc->dbg_flag) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 273 } 274 eigs_start = B_neigs -nmax; 275 B_neigs = nmax; 276 } 277 278 nmin_s = PetscMin(nmin,B_N); 279 if (B_neigs < nmin_s) { 280 PetscBLASInt B_neigs2; 281 282 B_IU = B_N - B_neigs; 283 B_IL = B_N - nmin_s + 1; 284 if (pcbddc->dbg_flag) { 285 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 286 } 287 if (sub_schurs->is_hermitian) { 288 PetscInt j; 289 for (j=0;j<subset_size;j++) { 290 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 291 } 292 for (j=0;j<subset_size;j++) { 293 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 294 } 295 } else { 296 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 297 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 298 } 299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 300 #if defined(PETSC_USE_COMPLEX) 301 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 302 #else 303 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 304 #endif 305 ierr = PetscFPTrapPop();CHKERRQ(ierr); 306 B_neigs += B_neigs2; 307 } 308 if (B_ierr) { 309 if (B_ierr < 0 ) { 310 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 311 } else if (B_ierr <= B_N) { 312 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 313 } else { 314 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 315 } 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 319 for (j=0;j<B_neigs;j++) { 320 if (eigs[j] == 0.0) { 321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 322 } else { 323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 324 } 325 } 326 } 327 } else { 328 /* TODO */ 329 } 330 } 331 maxneigs = PetscMax(B_neigs,maxneigs); 332 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 333 if (B_neigs) { 334 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 335 336 if (pcbddc->dbg_flag > 1) { 337 PetscInt ii; 338 for (ii=0;ii<B_neigs;ii++) { 339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 340 for (j=0;j<B_N;j++) { 341 #if defined(PETSC_USE_COMPLEX) 342 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 343 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 345 #else 346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 347 #endif 348 } 349 } 350 } 351 #if 0 352 for (j=0;j<B_neigs;j++) { 353 PetscBLASInt Blas_N,Blas_one = 1.0; 354 PetscScalar norm; 355 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 356 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 357 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 358 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 359 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 360 } else { 361 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 362 } 363 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 364 } 365 #endif 366 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 367 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 368 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 369 cum++; 370 } 371 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 372 /* shift for next computation */ 373 cumarray += subset_size*subset_size; 374 } 375 if (pcbddc->dbg_flag) { 376 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 377 } 378 379 if (mss) { 380 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 381 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 382 /* destroy matrices (junk) */ 383 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 384 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 385 } 386 if (allocated_S_St) { 387 ierr = PetscFree2(S,St);CHKERRQ(ierr); 388 } 389 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 390 #if defined(PETSC_USE_COMPLEX) 391 ierr = PetscFree(rwork);CHKERRQ(ierr); 392 #endif 393 if (pcbddc->dbg_flag) { 394 PetscInt maxneigs_r; 395 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 397 } 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCSetUpSolvers" 403 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 404 { 405 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 406 PetscScalar *coarse_submat_vals; 407 PetscErrorCode ierr; 408 409 PetscFunctionBegin; 410 /* Setup local scatters R_to_B and (optionally) R_to_D */ 411 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 412 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 413 414 /* Setup local neumann solver ksp_R */ 415 /* PCBDDCSetUpLocalScatters should be called first! */ 416 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 417 418 /* Change global null space passed in by the user if change of basis has been requested */ 419 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 420 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 421 } 422 423 /* 424 Setup local correction and local part of coarse basis. 425 Gives back the dense local part of the coarse matrix in column major ordering 426 */ 427 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 428 429 /* Compute total number of coarse nodes and setup coarse solver */ 430 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 431 432 /* free */ 433 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "PCBDDCResetCustomization" 439 PetscErrorCode PCBDDCResetCustomization(PC pc) 440 { 441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 442 PetscErrorCode ierr; 443 444 PetscFunctionBegin; 445 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 446 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 447 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 451 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 454 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 455 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCResetTopography" 461 PetscErrorCode PCBDDCResetTopography(PC pc) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 468 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 469 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 470 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 471 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "PCBDDCResetSolvers" 477 PetscErrorCode PCBDDCResetSolvers(PC pc) 478 { 479 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 480 PetscScalar *array; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 485 if (pcbddc->coarse_phi_B) { 486 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 487 ierr = PetscFree(array);CHKERRQ(ierr); 488 } 489 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 492 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 495 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 496 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 497 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 498 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 499 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 500 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 501 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 502 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 503 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 504 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 505 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 506 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 507 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 508 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 509 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 510 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 511 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 512 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 513 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 514 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 520 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 521 { 522 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 523 PC_IS *pcis = (PC_IS*)pc->data; 524 VecType impVecType; 525 PetscInt n_constraints,n_R,old_size; 526 PetscErrorCode ierr; 527 528 PetscFunctionBegin; 529 if (!pcbddc->ConstraintMatrix) { 530 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 531 } 532 /* get sizes */ 533 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 534 n_R = pcis->n-pcbddc->n_vertices; 535 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 536 /* local work vectors (try to avoid unneeded work)*/ 537 /* R nodes */ 538 old_size = -1; 539 if (pcbddc->vec1_R) { 540 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 541 } 542 if (n_R != old_size) { 543 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 544 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 545 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 546 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 547 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 548 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 549 } 550 /* local primal dofs */ 551 old_size = -1; 552 if (pcbddc->vec1_P) { 553 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 554 } 555 if (pcbddc->local_primal_size != old_size) { 556 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 557 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 558 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 560 } 561 /* local explicit constraints */ 562 old_size = -1; 563 if (pcbddc->vec1_C) { 564 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 565 } 566 if (n_constraints && n_constraints != old_size) { 567 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 568 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 569 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 570 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 571 } 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "PCBDDCSetUpCorrection" 577 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 578 { 579 PetscErrorCode ierr; 580 /* pointers to pcis and pcbddc */ 581 PC_IS* pcis = (PC_IS*)pc->data; 582 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 583 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 584 /* submatrices of local problem */ 585 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 586 /* submatrices of local coarse problem */ 587 Mat S_VV,S_CV,S_VC,S_CC; 588 /* working matrices */ 589 Mat C_CR; 590 /* additional working stuff */ 591 PC pc_R; 592 Mat F; 593 PetscBool isLU,isCHOL,isILU; 594 595 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 596 PetscScalar *work; 597 PetscInt *idx_V_B; 598 PetscInt n,n_vertices,n_constraints; 599 PetscInt i,n_R,n_D,n_B; 600 PetscBool unsymmetric_check; 601 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 602 MatType impMatType; 603 /* some shortcuts to scalars */ 604 PetscScalar one=1.0,m_one=-1.0; 605 606 PetscFunctionBegin; 607 n_vertices = pcbddc->n_vertices; 608 n_constraints = pcbddc->local_primal_size-n_vertices; 609 /* Set Non-overlapping dimensions */ 610 n_B = pcis->n_B; 611 n_D = pcis->n - n_B; 612 n_R = pcis->n - n_vertices; 613 614 /* Set types for local objects needed by BDDC precondtioner */ 615 impMatType = MATSEQDENSE; 616 617 /* vertices in boundary numbering */ 618 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 619 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 620 if (i != n_vertices) { 621 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 622 } 623 624 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 625 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 626 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 627 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 628 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 629 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 630 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 631 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 632 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 633 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 634 635 unsymmetric_check = PETSC_FALSE; 636 /* allocate workspace */ 637 n = 0; 638 if (n_constraints) { 639 n += n_R*n_constraints; 640 } 641 if (n_vertices) { 642 n = PetscMax(2*n_R*n_vertices,n); 643 n = PetscMax((n_R+n_B)*n_vertices,n); 644 } 645 if (!pcbddc->symmetric_primal) { 646 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 647 unsymmetric_check = PETSC_TRUE; 648 } 649 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 650 651 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 652 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 653 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 654 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 655 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 656 if (isLU || isILU || isCHOL) { 657 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 658 } else if (sub_schurs->reuse_mumps) { 659 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 660 MatFactorType type; 661 662 F = reuse_mumps->F; 663 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 664 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 665 } else { 666 F = NULL; 667 } 668 669 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 670 if (n_constraints) { 671 Mat M1,M2,M3; 672 Mat auxmat; 673 IS is_aux; 674 PetscScalar *array,*array2; 675 676 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 677 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 678 679 /* Extract constraints on R nodes: C_{CR} */ 680 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 681 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 682 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 683 684 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 685 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 686 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 687 for (i=0;i<n_constraints;i++) { 688 const PetscScalar *row_cmat_values; 689 const PetscInt *row_cmat_indices; 690 PetscInt size_of_constraint,j; 691 692 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 693 for (j=0;j<size_of_constraint;j++) { 694 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 695 } 696 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 697 } 698 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 699 if (F) { 700 Mat B; 701 702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 703 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 704 ierr = MatDestroy(&B);CHKERRQ(ierr); 705 } else { 706 PetscScalar *marr; 707 708 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 709 for (i=0;i<n_constraints;i++) { 710 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 711 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 712 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 713 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 714 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 715 } 716 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 717 } 718 if (!pcbddc->switch_static) { 719 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 720 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 721 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 722 for (i=0;i<n_constraints;i++) { 723 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 724 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 725 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 728 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 729 } 730 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 731 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 732 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 733 } else { 734 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 735 pcbddc->local_auxmat2 = local_auxmat2_R; 736 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 737 } 738 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 739 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 740 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 741 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 742 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 743 if (isCHOL) { 744 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 745 } else { 746 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 747 } 748 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 749 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 750 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 751 ierr = MatDestroy(&M2);CHKERRQ(ierr); 752 ierr = MatDestroy(&M3);CHKERRQ(ierr); 753 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 754 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 755 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 756 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 757 ierr = MatDestroy(&M1);CHKERRQ(ierr); 758 } 759 /* Get submatrices from subdomain matrix */ 760 if (n_vertices) { 761 IS is_aux; 762 763 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 764 IS tis; 765 766 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 767 ierr = ISSort(tis);CHKERRQ(ierr); 768 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 769 ierr = ISDestroy(&tis);CHKERRQ(ierr); 770 } else { 771 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 772 } 773 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 774 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 775 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 776 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 777 } 778 779 /* Matrix of coarse basis functions (local) */ 780 if (pcbddc->coarse_phi_B) { 781 PetscInt on_B,on_primal,on_D=n_D; 782 if (pcbddc->coarse_phi_D) { 783 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 784 } 785 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 786 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 787 PetscScalar *marray; 788 789 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 790 ierr = PetscFree(marray);CHKERRQ(ierr); 791 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 792 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 793 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 794 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 795 } 796 } 797 798 if (!pcbddc->coarse_phi_B) { 799 PetscScalar *marray; 800 801 n = n_B*pcbddc->local_primal_size; 802 if (pcbddc->switch_static || pcbddc->dbg_flag) { 803 n += n_D*pcbddc->local_primal_size; 804 } 805 if (!pcbddc->symmetric_primal) { 806 n *= 2; 807 } 808 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 810 n = n_B*pcbddc->local_primal_size; 811 if (pcbddc->switch_static || pcbddc->dbg_flag) { 812 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 813 n += n_D*pcbddc->local_primal_size; 814 } 815 if (!pcbddc->symmetric_primal) { 816 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 817 if (pcbddc->switch_static || pcbddc->dbg_flag) { 818 n = n_B*pcbddc->local_primal_size; 819 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 820 } 821 } else { 822 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 823 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 824 if (pcbddc->switch_static || pcbddc->dbg_flag) { 825 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 826 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 827 } 828 } 829 } 830 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 831 /* vertices */ 832 if (n_vertices) { 833 834 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 835 836 if (n_R) { 837 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 838 PetscBLASInt B_N,B_one = 1; 839 PetscScalar *x,*y; 840 PetscBool isseqaij; 841 842 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 843 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 845 if (F) { /* TODO could be optimized for symmetric problems */ 846 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 847 } else { 848 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 849 for (i=0;i<n_vertices;i++) { 850 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 851 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 852 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 853 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 854 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 855 } 856 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 857 } 858 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 859 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 860 if (n_constraints) { 861 Mat B; 862 863 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 864 for (i=0;i<n_vertices;i++) { 865 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 870 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 871 } 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 873 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 874 ierr = MatDestroy(&B);CHKERRQ(ierr); 875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 876 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 877 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 878 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 879 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 880 ierr = MatDestroy(&B);CHKERRQ(ierr); 881 } 882 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 883 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 884 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 885 } 886 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 887 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 888 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 889 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 890 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 891 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 892 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 893 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 894 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 895 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 896 } else { 897 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 898 } 899 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 900 /* coarse basis functions */ 901 for (i=0;i<n_vertices;i++) { 902 PetscScalar *y; 903 904 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 905 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 906 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 y[n_B*i+idx_V_B[i]] = 1.0; 910 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 911 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 912 913 if (pcbddc->switch_static || pcbddc->dbg_flag) { 914 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 916 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 919 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 920 } 921 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 922 } 923 /* if n_R == 0 the object is not destroyed */ 924 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 925 } 926 927 if (n_constraints) { 928 Mat B; 929 930 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 931 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 932 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 933 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 934 if (n_vertices) { 935 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 936 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 937 } else { 938 Mat S_VCt; 939 940 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 941 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 943 } 944 } 945 ierr = MatDestroy(&B);CHKERRQ(ierr); 946 /* coarse basis functions */ 947 for (i=0;i<n_constraints;i++) { 948 PetscScalar *y; 949 950 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 951 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 952 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 953 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 956 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 957 if (pcbddc->switch_static || pcbddc->dbg_flag) { 958 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 959 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 960 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 963 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 964 } 965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 966 } 967 } 968 if (n_constraints) { 969 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 970 } 971 972 /* compute other basis functions for non-symmetric problems */ 973 if (!pcbddc->symmetric_primal) { 974 975 if (n_constraints) { 976 Mat S_CCT,B_C; 977 978 /* this is a lazy thing */ 979 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 980 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 981 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 982 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 983 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 984 if (n_vertices) { 985 Mat B_V,S_VCT; 986 987 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 988 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 989 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 990 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 992 } 993 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 994 } else { /* if there are no constraints, reset work */ 995 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 996 } 997 if (n_vertices && n_R) { 998 Mat A_VRT; 999 PetscScalar *marray; 1000 PetscBLASInt B_N,B_one = 1; 1001 1002 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1003 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1004 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1005 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1006 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1007 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1008 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1009 } 1010 1011 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1012 for (i=0;i<pcbddc->local_primal_size;i++) { 1013 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1014 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1015 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1016 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1017 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1018 } 1019 } else { 1020 for (i=0;i<pcbddc->local_primal_size;i++) { 1021 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1022 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1023 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1024 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1025 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1026 } 1027 } 1028 /* coarse basis functions */ 1029 for (i=0;i<pcbddc->local_primal_size;i++) { 1030 PetscScalar *y; 1031 1032 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1033 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1034 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1035 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 if (i<n_vertices) { 1038 y[n_B*i+idx_V_B[i]] = 1.0; 1039 } 1040 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1041 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1042 1043 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1044 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1045 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1046 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1049 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1050 } 1051 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1052 } 1053 } 1054 /* free memory */ 1055 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1056 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1057 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1058 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1059 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1060 ierr = PetscFree(work);CHKERRQ(ierr); 1061 if (n_vertices) { 1062 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1063 } 1064 if (n_constraints) { 1065 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1066 } 1067 /* Checking coarse_sub_mat and coarse basis functios */ 1068 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1069 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1070 if (pcbddc->dbg_flag) { 1071 Mat coarse_sub_mat; 1072 Mat AUXMAT,TM1,TM2,TM3,TM4; 1073 Mat coarse_phi_D,coarse_phi_B; 1074 Mat coarse_psi_D,coarse_psi_B; 1075 Mat A_II,A_BB,A_IB,A_BI; 1076 Mat C_B,CPHI; 1077 IS is_dummy; 1078 Vec mones; 1079 MatType checkmattype=MATSEQAIJ; 1080 PetscReal real_value; 1081 1082 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1083 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1084 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1085 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1086 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1087 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1088 if (unsymmetric_check) { 1089 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1090 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1091 } 1092 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1093 1094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1097 if (unsymmetric_check) { 1098 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1099 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1100 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1101 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1102 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1103 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1104 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1105 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1106 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1107 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1108 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1109 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1110 } else { 1111 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1112 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1113 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1114 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1115 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1116 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1117 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1118 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1119 } 1120 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1121 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1122 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1123 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1124 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1125 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1126 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1127 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1128 1129 /* check constraints */ 1130 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1131 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1132 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1133 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1134 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1135 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1136 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1138 if (unsymmetric_check) { 1139 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1140 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1141 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1142 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1144 } 1145 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1146 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1147 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1148 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1149 1150 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1151 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1152 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1153 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1154 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1155 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1156 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1157 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1158 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1159 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1160 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1161 if (unsymmetric_check) { 1162 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1163 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1164 } 1165 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1166 } 1167 /* get back data */ 1168 *coarse_submat_vals_n = coarse_submat_vals; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1174 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1175 { 1176 Mat *work_mat; 1177 IS isrow_s,iscol_s; 1178 PetscBool rsorted,csorted; 1179 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1184 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1185 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1186 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1187 1188 if (!rsorted) { 1189 const PetscInt *idxs; 1190 PetscInt *idxs_sorted,i; 1191 1192 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1193 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1194 for (i=0;i<rsize;i++) { 1195 idxs_perm_r[i] = i; 1196 } 1197 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1198 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1199 for (i=0;i<rsize;i++) { 1200 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1201 } 1202 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1203 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1204 } else { 1205 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1206 isrow_s = isrow; 1207 } 1208 1209 if (!csorted) { 1210 if (isrow == iscol) { 1211 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1212 iscol_s = isrow_s; 1213 } else { 1214 const PetscInt *idxs; 1215 PetscInt *idxs_sorted,i; 1216 1217 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1218 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1219 for (i=0;i<csize;i++) { 1220 idxs_perm_c[i] = i; 1221 } 1222 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1223 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1224 for (i=0;i<csize;i++) { 1225 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1226 } 1227 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1228 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1229 } 1230 } else { 1231 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1232 iscol_s = iscol; 1233 } 1234 1235 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1236 1237 if (!rsorted || !csorted) { 1238 Mat new_mat; 1239 IS is_perm_r,is_perm_c; 1240 1241 if (!rsorted) { 1242 PetscInt *idxs_r,i; 1243 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1244 for (i=0;i<rsize;i++) { 1245 idxs_r[idxs_perm_r[i]] = i; 1246 } 1247 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1248 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1249 } else { 1250 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1251 } 1252 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1253 1254 if (!csorted) { 1255 if (isrow_s == iscol_s) { 1256 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1257 is_perm_c = is_perm_r; 1258 } else { 1259 PetscInt *idxs_c,i; 1260 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1261 for (i=0;i<csize;i++) { 1262 idxs_c[idxs_perm_c[i]] = i; 1263 } 1264 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1265 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1266 } 1267 } else { 1268 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1269 } 1270 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1271 1272 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1274 work_mat[0] = new_mat; 1275 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1276 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1277 } 1278 1279 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1280 *B = work_mat[0]; 1281 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1282 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1283 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1289 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1290 { 1291 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1292 PC_IS* pcis = (PC_IS*)pc->data; 1293 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1294 Mat new_mat; 1295 IS is_local,is_global; 1296 PetscInt local_size; 1297 PetscBool isseqaij; 1298 PetscErrorCode ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1302 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1303 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1305 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1306 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1307 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1308 1309 /* check */ 1310 if (pcbddc->dbg_flag) { 1311 Vec x,x_change; 1312 PetscReal error; 1313 1314 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1315 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1316 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1317 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1320 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1321 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1322 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1323 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1326 ierr = VecDestroy(&x);CHKERRQ(ierr); 1327 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1328 } 1329 1330 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1331 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1332 if (isseqaij) { 1333 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1334 } else { 1335 Mat work_mat; 1336 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1337 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1338 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1339 } 1340 if (matis->A->symmetric_set) { 1341 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1342 #if !defined(PETSC_USE_COMPLEX) 1343 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1344 #endif 1345 } 1346 /* 1347 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1348 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1349 */ 1350 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1356 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1357 { 1358 PC_IS* pcis = (PC_IS*)(pc->data); 1359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1360 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1361 PetscInt *idx_R_local=NULL; 1362 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1363 PetscInt vbs,bs; 1364 PetscBT bitmask=NULL; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 /* 1369 No need to setup local scatters if 1370 - primal space is unchanged 1371 AND 1372 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1373 AND 1374 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1375 */ 1376 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1377 PetscFunctionReturn(0); 1378 } 1379 /* destroy old objects */ 1380 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1381 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1382 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1383 /* Set Non-overlapping dimensions */ 1384 n_B = pcis->n_B; 1385 n_D = pcis->n - n_B; 1386 n_vertices = pcbddc->n_vertices; 1387 1388 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1389 1390 /* create auxiliary bitmask and allocate workspace */ 1391 if (!sub_schurs->reuse_mumps) { 1392 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1393 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1394 for (i=0;i<n_vertices;i++) { 1395 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1396 } 1397 1398 for (i=0, n_R=0; i<pcis->n; i++) { 1399 if (!PetscBTLookup(bitmask,i)) { 1400 idx_R_local[n_R++] = i; 1401 } 1402 } 1403 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1404 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1405 1406 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1407 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1408 } 1409 1410 /* Block code */ 1411 vbs = 1; 1412 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1413 if (bs>1 && !(n_vertices%bs)) { 1414 PetscBool is_blocked = PETSC_TRUE; 1415 PetscInt *vary; 1416 if (!sub_schurs->reuse_mumps) { 1417 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1418 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1419 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1420 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1421 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1422 for (i=0; i<pcis->n/bs; i++) { 1423 if (vary[i]!=0 && vary[i]!=bs) { 1424 is_blocked = PETSC_FALSE; 1425 break; 1426 } 1427 } 1428 ierr = PetscFree(vary);CHKERRQ(ierr); 1429 } else { 1430 /* Verify directly the R set */ 1431 for (i=0; i<n_R/bs; i++) { 1432 PetscInt j,node=idx_R_local[bs*i]; 1433 for (j=1; j<bs; j++) { 1434 if (node != idx_R_local[bs*i+j]-j) { 1435 is_blocked = PETSC_FALSE; 1436 break; 1437 } 1438 } 1439 } 1440 } 1441 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1442 vbs = bs; 1443 for (i=0;i<n_R/vbs;i++) { 1444 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1445 } 1446 } 1447 } 1448 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1449 if (sub_schurs->reuse_mumps) { 1450 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1451 1452 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1453 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1454 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1455 reuse_mumps->is_R = pcbddc->is_R_local; 1456 } else { 1457 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1458 } 1459 1460 /* print some info if requested */ 1461 if (pcbddc->dbg_flag) { 1462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1469 } 1470 1471 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1472 if (!sub_schurs->reuse_mumps) { 1473 IS is_aux1,is_aux2; 1474 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1475 1476 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1477 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1478 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1479 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1480 for (i=0; i<n_D; i++) { 1481 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1482 } 1483 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1484 for (i=0, j=0; i<n_R; i++) { 1485 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1486 aux_array1[j++] = i; 1487 } 1488 } 1489 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1490 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1491 for (i=0, j=0; i<n_B; i++) { 1492 if (!PetscBTLookup(bitmask,is_indices[i])) { 1493 aux_array2[j++] = i; 1494 } 1495 } 1496 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1497 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1498 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1499 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1500 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1501 1502 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1503 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1504 for (i=0, j=0; i<n_R; i++) { 1505 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1506 aux_array1[j++] = i; 1507 } 1508 } 1509 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1510 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1511 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1512 } 1513 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1514 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1515 } else { 1516 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1517 IS tis; 1518 PetscInt schur_size; 1519 1520 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1521 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1522 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1523 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1524 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1525 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1526 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1527 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1528 } 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1536 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1537 { 1538 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1539 PC_IS *pcis = (PC_IS*)pc->data; 1540 PC pc_temp; 1541 Mat A_RR; 1542 MatReuse reuse; 1543 PetscScalar m_one = -1.0; 1544 PetscReal value; 1545 PetscInt n_D,n_R; 1546 PetscBool use_exact,use_exact_reduced,issbaij; 1547 PetscErrorCode ierr; 1548 /* prefixes stuff */ 1549 char dir_prefix[256],neu_prefix[256],str_level[16]; 1550 size_t len; 1551 1552 PetscFunctionBegin; 1553 1554 /* compute prefixes */ 1555 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1556 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1557 if (!pcbddc->current_level) { 1558 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1559 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1560 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1561 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1562 } else { 1563 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1564 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1565 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1566 len -= 15; /* remove "pc_bddc_coarse_" */ 1567 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1568 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1569 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1570 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1571 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1572 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1573 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1574 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1575 } 1576 1577 /* DIRICHLET PROBLEM */ 1578 if (dirichlet) { 1579 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1580 if (pcbddc->local_mat->symmetric_set) { 1581 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1582 } 1583 /* Matrix for Dirichlet problem is pcis->A_II */ 1584 n_D = pcis->n - pcis->n_B; 1585 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1586 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1587 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1588 /* default */ 1589 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1590 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1591 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1592 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1593 if (issbaij) { 1594 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1595 } else { 1596 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1597 } 1598 /* Allow user's customization */ 1599 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1600 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1601 } 1602 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1603 if (sub_schurs->reuse_mumps) { 1604 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1605 1606 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1607 } 1608 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1609 if (!n_D) { 1610 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1611 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1612 } 1613 /* Set Up KSP for Dirichlet problem of BDDC */ 1614 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1615 /* set ksp_D into pcis data */ 1616 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1617 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1618 pcis->ksp_D = pcbddc->ksp_D; 1619 } 1620 1621 /* NEUMANN PROBLEM */ 1622 A_RR = 0; 1623 if (neumann) { 1624 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1625 PetscInt ibs,mbs; 1626 PetscBool issbaij; 1627 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1628 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1629 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1630 if (pcbddc->ksp_R) { /* already created ksp */ 1631 PetscInt nn_R; 1632 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1633 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1634 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1635 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1636 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1637 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1638 reuse = MAT_INITIAL_MATRIX; 1639 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1640 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1641 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1642 reuse = MAT_INITIAL_MATRIX; 1643 } else { /* safe to reuse the matrix */ 1644 reuse = MAT_REUSE_MATRIX; 1645 } 1646 } 1647 /* last check */ 1648 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1649 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1650 reuse = MAT_INITIAL_MATRIX; 1651 } 1652 } else { /* first time, so we need to create the matrix */ 1653 reuse = MAT_INITIAL_MATRIX; 1654 } 1655 /* extract A_RR */ 1656 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1657 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1658 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1659 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1660 if (matis->A == pcbddc->local_mat) { 1661 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1662 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1663 } else { 1664 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1665 } 1666 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1667 if (matis->A == pcbddc->local_mat) { 1668 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1669 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1670 } else { 1671 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1672 } 1673 } 1674 if (!sub_schurs->reuse_mumps) { 1675 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1676 if (pcbddc->local_mat->symmetric_set) { 1677 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1678 } 1679 } else { 1680 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1681 1682 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1683 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1684 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1685 } 1686 if (!pcbddc->ksp_R) { /* create object if not present */ 1687 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1688 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1689 /* default */ 1690 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1691 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1692 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1693 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1694 if (issbaij) { 1695 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1696 } else { 1697 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1698 } 1699 /* Allow user's customization */ 1700 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1701 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1702 } 1703 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1704 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1705 if (!n_R) { 1706 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1707 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1708 } 1709 /* Reuse MUMPS solver if it is present */ 1710 if (sub_schurs->reuse_mumps) { 1711 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1712 1713 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1714 } 1715 /* Set Up KSP for Neumann problem of BDDC */ 1716 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1717 } 1718 /* free Neumann problem's matrix */ 1719 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1720 1721 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1722 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1723 if (pcbddc->dbg_flag) { 1724 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1725 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1726 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1727 } 1728 if (dirichlet) { /* Dirichlet */ 1729 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1730 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1731 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1732 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1733 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1734 /* need to be adapted? */ 1735 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1736 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1737 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1738 /* print info */ 1739 if (pcbddc->dbg_flag) { 1740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1741 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1742 } 1743 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1744 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1745 } 1746 } 1747 if (neumann) { /* Neumann */ 1748 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1749 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1750 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1751 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1752 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1753 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1754 /* need to be adapted? */ 1755 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1756 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1757 /* print info */ 1758 if (pcbddc->dbg_flag) { 1759 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1760 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1761 } 1762 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1763 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1764 } 1765 } 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1772 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1773 { 1774 PetscErrorCode ierr; 1775 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1776 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1777 1778 PetscFunctionBegin; 1779 if (!sub_schurs->reuse_mumps) { 1780 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1781 } 1782 if (!pcbddc->switch_static) { 1783 if (applytranspose && pcbddc->local_auxmat1) { 1784 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1785 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1786 } 1787 if (!sub_schurs->reuse_mumps) { 1788 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1789 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1790 } else { 1791 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1792 1793 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 } 1796 } else { 1797 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1798 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1799 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1800 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1801 if (applytranspose && pcbddc->local_auxmat1) { 1802 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1803 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1804 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1805 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1806 } 1807 } 1808 if (!sub_schurs->reuse_mumps) { 1809 if (applytranspose) { 1810 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1811 } else { 1812 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1813 } 1814 #if defined(PETSC_HAVE_MUMPS) 1815 } else { 1816 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1817 1818 if (applytranspose) { 1819 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1820 } else { 1821 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1822 } 1823 #endif 1824 } 1825 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1826 if (!pcbddc->switch_static) { 1827 if (!sub_schurs->reuse_mumps) { 1828 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 } else { 1831 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1832 1833 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1834 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1835 } 1836 if (!applytranspose && pcbddc->local_auxmat1) { 1837 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1838 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1839 } 1840 } else { 1841 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1842 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1843 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1844 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 if (!applytranspose && pcbddc->local_auxmat1) { 1846 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1847 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1848 } 1849 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1852 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1860 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1861 { 1862 PetscErrorCode ierr; 1863 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1864 PC_IS* pcis = (PC_IS*) (pc->data); 1865 const PetscScalar zero = 0.0; 1866 1867 PetscFunctionBegin; 1868 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1869 if (applytranspose) { 1870 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1871 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1872 } else { 1873 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1874 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1875 } 1876 /* start communications from local primal nodes to rhs of coarse solver */ 1877 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1878 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1879 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1880 1881 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1882 /* TODO remove null space when doing multilevel */ 1883 if (pcbddc->coarse_ksp) { 1884 Vec rhs,sol; 1885 1886 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1887 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1888 if (applytranspose) { 1889 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1890 } else { 1891 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1892 } 1893 } 1894 1895 /* Local solution on R nodes */ 1896 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1897 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1898 } 1899 1900 /* communications from coarse sol to local primal nodes */ 1901 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1902 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1903 1904 /* Sum contributions from two levels */ 1905 if (applytranspose) { 1906 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1907 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1908 } else { 1909 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1910 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1917 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1918 { 1919 PetscErrorCode ierr; 1920 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1921 PetscScalar *array; 1922 Vec from,to; 1923 1924 PetscFunctionBegin; 1925 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1926 from = pcbddc->coarse_vec; 1927 to = pcbddc->vec1_P; 1928 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1929 Vec tvec; 1930 1931 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1932 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1933 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1934 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1935 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1936 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1937 } 1938 } else { /* from local to global -> put data in coarse right hand side */ 1939 from = pcbddc->vec1_P; 1940 to = pcbddc->coarse_vec; 1941 } 1942 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1948 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1949 { 1950 PetscErrorCode ierr; 1951 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1952 PetscScalar *array; 1953 Vec from,to; 1954 1955 PetscFunctionBegin; 1956 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1957 from = pcbddc->coarse_vec; 1958 to = pcbddc->vec1_P; 1959 } else { /* from local to global -> put data in coarse right hand side */ 1960 from = pcbddc->vec1_P; 1961 to = pcbddc->coarse_vec; 1962 } 1963 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1964 if (smode == SCATTER_FORWARD) { 1965 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1966 Vec tvec; 1967 1968 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1969 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1970 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1971 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1972 } 1973 } else { 1974 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1975 ierr = VecResetArray(from);CHKERRQ(ierr); 1976 } 1977 } 1978 PetscFunctionReturn(0); 1979 } 1980 1981 /* uncomment for testing purposes */ 1982 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1985 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1986 { 1987 PetscErrorCode ierr; 1988 PC_IS* pcis = (PC_IS*)(pc->data); 1989 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1990 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1991 /* one and zero */ 1992 PetscScalar one=1.0,zero=0.0; 1993 /* space to store constraints and their local indices */ 1994 PetscScalar *constraints_data; 1995 PetscInt *constraints_idxs,*constraints_idxs_B; 1996 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1997 PetscInt *constraints_n; 1998 /* iterators */ 1999 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2000 /* BLAS integers */ 2001 PetscBLASInt lwork,lierr; 2002 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2003 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2004 /* reuse */ 2005 PetscInt olocal_primal_size,olocal_primal_size_cc; 2006 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2007 /* change of basis */ 2008 PetscBool qr_needed; 2009 PetscBT change_basis,qr_needed_idx; 2010 /* auxiliary stuff */ 2011 PetscInt *nnz,*is_indices; 2012 PetscInt ncc; 2013 /* some quantities */ 2014 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2015 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2016 2017 PetscFunctionBegin; 2018 /* Destroy Mat objects computed previously */ 2019 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2020 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2021 /* save info on constraints from previous setup (if any) */ 2022 olocal_primal_size = pcbddc->local_primal_size; 2023 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2024 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2025 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2026 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2027 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2028 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2029 2030 /* print some info */ 2031 if (pcbddc->dbg_flag) { 2032 IS vertices; 2033 PetscInt nv,nedges,nfaces; 2034 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2035 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2036 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2037 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2041 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2042 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2043 } 2044 2045 if (!pcbddc->adaptive_selection) { 2046 IS ISForVertices,*ISForFaces,*ISForEdges; 2047 MatNullSpace nearnullsp; 2048 const Vec *nearnullvecs; 2049 Vec *localnearnullsp; 2050 PetscScalar *array; 2051 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2052 PetscBool nnsp_has_cnst; 2053 /* LAPACK working arrays for SVD or POD */ 2054 PetscBool skip_lapack,boolforchange; 2055 PetscScalar *work; 2056 PetscReal *singular_vals; 2057 #if defined(PETSC_USE_COMPLEX) 2058 PetscReal *rwork; 2059 #endif 2060 #if defined(PETSC_MISSING_LAPACK_GESVD) 2061 PetscScalar *temp_basis,*correlation_mat; 2062 #else 2063 PetscBLASInt dummy_int=1; 2064 PetscScalar dummy_scalar=1.; 2065 #endif 2066 2067 /* Get index sets for faces, edges and vertices from graph */ 2068 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2069 /* free unneeded index sets */ 2070 if (!pcbddc->use_vertices) { 2071 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2072 } 2073 if (!pcbddc->use_edges) { 2074 for (i=0;i<n_ISForEdges;i++) { 2075 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2076 } 2077 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2078 n_ISForEdges = 0; 2079 } 2080 if (!pcbddc->use_faces) { 2081 for (i=0;i<n_ISForFaces;i++) { 2082 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2083 } 2084 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2085 n_ISForFaces = 0; 2086 } 2087 2088 #if defined(PETSC_USE_DEBUG) 2089 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2090 Also use_change_of_basis should be consistent among processors */ 2091 if (pcbddc->NullSpace) { 2092 PetscBool tbool[2],gbool[2]; 2093 2094 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2095 pcbddc->use_change_of_basis = PETSC_TRUE; 2096 if (!ISForEdges) { 2097 pcbddc->use_change_on_faces = PETSC_TRUE; 2098 } 2099 } 2100 tbool[0] = pcbddc->use_change_of_basis; 2101 tbool[1] = pcbddc->use_change_on_faces; 2102 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2103 pcbddc->use_change_of_basis = gbool[0]; 2104 pcbddc->use_change_on_faces = gbool[1]; 2105 } 2106 #endif 2107 2108 /* check if near null space is attached to global mat */ 2109 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2110 if (nearnullsp) { 2111 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2112 /* remove any stored info */ 2113 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2114 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2115 /* store information for BDDC solver reuse */ 2116 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2117 pcbddc->onearnullspace = nearnullsp; 2118 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2119 for (i=0;i<nnsp_size;i++) { 2120 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2121 } 2122 } else { /* if near null space is not provided BDDC uses constants by default */ 2123 nnsp_size = 0; 2124 nnsp_has_cnst = PETSC_TRUE; 2125 } 2126 /* get max number of constraints on a single cc */ 2127 max_constraints = nnsp_size; 2128 if (nnsp_has_cnst) max_constraints++; 2129 2130 /* 2131 Evaluate maximum storage size needed by the procedure 2132 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2133 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2134 There can be multiple constraints per connected component 2135 */ 2136 n_vertices = 0; 2137 if (ISForVertices) { 2138 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2139 } 2140 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2141 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2142 2143 total_counts = n_ISForFaces+n_ISForEdges; 2144 total_counts *= max_constraints; 2145 total_counts += n_vertices; 2146 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2147 2148 total_counts = 0; 2149 max_size_of_constraint = 0; 2150 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2151 IS used_is; 2152 if (i<n_ISForEdges) { 2153 used_is = ISForEdges[i]; 2154 } else { 2155 used_is = ISForFaces[i-n_ISForEdges]; 2156 } 2157 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2158 total_counts += j; 2159 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2160 } 2161 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2162 2163 /* get local part of global near null space vectors */ 2164 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2165 for (k=0;k<nnsp_size;k++) { 2166 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2167 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2168 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2169 } 2170 2171 /* whether or not to skip lapack calls */ 2172 skip_lapack = PETSC_TRUE; 2173 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2174 2175 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2176 if (!skip_lapack) { 2177 PetscScalar temp_work; 2178 2179 #if defined(PETSC_MISSING_LAPACK_GESVD) 2180 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2181 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2182 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2183 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2184 #if defined(PETSC_USE_COMPLEX) 2185 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2186 #endif 2187 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2188 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2189 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2190 lwork = -1; 2191 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2192 #if !defined(PETSC_USE_COMPLEX) 2193 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2194 #else 2195 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2196 #endif 2197 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2198 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2199 #else /* on missing GESVD */ 2200 /* SVD */ 2201 PetscInt max_n,min_n; 2202 max_n = max_size_of_constraint; 2203 min_n = max_constraints; 2204 if (max_size_of_constraint < max_constraints) { 2205 min_n = max_size_of_constraint; 2206 max_n = max_constraints; 2207 } 2208 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2209 #if defined(PETSC_USE_COMPLEX) 2210 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2211 #endif 2212 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2213 lwork = -1; 2214 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2216 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2217 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2218 #if !defined(PETSC_USE_COMPLEX) 2219 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2220 #else 2221 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2222 #endif 2223 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2224 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2225 #endif /* on missing GESVD */ 2226 /* Allocate optimal workspace */ 2227 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2228 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2229 } 2230 /* Now we can loop on constraining sets */ 2231 total_counts = 0; 2232 constraints_idxs_ptr[0] = 0; 2233 constraints_data_ptr[0] = 0; 2234 /* vertices */ 2235 if (n_vertices) { 2236 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2237 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2238 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2239 for (i=0;i<n_vertices;i++) { 2240 constraints_n[total_counts] = 1; 2241 constraints_data[total_counts] = 1.0; 2242 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2243 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2244 total_counts++; 2245 } 2246 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2247 PetscBool used_vertex; 2248 for (i=0;i<n_vertices;i++) { 2249 used_vertex = PETSC_FALSE; 2250 k = 0; 2251 while (!used_vertex && k<nnsp_size) { 2252 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2253 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2254 constraints_n[total_counts] = 1; 2255 constraints_idxs[total_counts] = is_indices[i]; 2256 constraints_data[total_counts] = 1.0; 2257 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2258 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2259 total_counts++; 2260 used_vertex = PETSC_TRUE; 2261 } 2262 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2263 k++; 2264 } 2265 } 2266 } 2267 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2268 n_vertices = total_counts; 2269 } 2270 2271 /* edges and faces */ 2272 total_counts_cc = total_counts; 2273 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2274 IS used_is; 2275 PetscBool idxs_copied = PETSC_FALSE; 2276 2277 if (ncc<n_ISForEdges) { 2278 used_is = ISForEdges[ncc]; 2279 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2280 } else { 2281 used_is = ISForFaces[ncc-n_ISForEdges]; 2282 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2283 } 2284 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2285 2286 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2287 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2288 /* change of basis should not be performed on local periodic nodes */ 2289 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2290 if (nnsp_has_cnst) { 2291 PetscScalar quad_value; 2292 2293 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2294 idxs_copied = PETSC_TRUE; 2295 2296 if (!pcbddc->use_nnsp_true) { 2297 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2298 } else { 2299 quad_value = 1.0; 2300 } 2301 for (j=0;j<size_of_constraint;j++) { 2302 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2303 } 2304 temp_constraints++; 2305 total_counts++; 2306 } 2307 for (k=0;k<nnsp_size;k++) { 2308 PetscReal real_value; 2309 PetscScalar *ptr_to_data; 2310 2311 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2312 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2313 for (j=0;j<size_of_constraint;j++) { 2314 ptr_to_data[j] = array[is_indices[j]]; 2315 } 2316 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2317 /* check if array is null on the connected component */ 2318 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2319 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2320 if (real_value > 0.0) { /* keep indices and values */ 2321 temp_constraints++; 2322 total_counts++; 2323 if (!idxs_copied) { 2324 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2325 idxs_copied = PETSC_TRUE; 2326 } 2327 } 2328 } 2329 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2330 valid_constraints = temp_constraints; 2331 if (!pcbddc->use_nnsp_true && temp_constraints) { 2332 if (temp_constraints == 1) { /* just normalize the constraint */ 2333 PetscScalar norm,*ptr_to_data; 2334 2335 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2336 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2337 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2338 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2339 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2340 } else { /* perform SVD */ 2341 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2342 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2343 2344 #if defined(PETSC_MISSING_LAPACK_GESVD) 2345 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2346 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2347 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2348 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2349 from that computed using LAPACKgesvd 2350 -> This is due to a different computation of eigenvectors in LAPACKheev 2351 -> The quality of the POD-computed basis will be the same */ 2352 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2353 /* Store upper triangular part of correlation matrix */ 2354 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2355 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2356 for (j=0;j<temp_constraints;j++) { 2357 for (k=0;k<j+1;k++) { 2358 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2359 } 2360 } 2361 /* compute eigenvalues and eigenvectors of correlation matrix */ 2362 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2363 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2364 #if !defined(PETSC_USE_COMPLEX) 2365 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2366 #else 2367 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2368 #endif 2369 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2370 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2371 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2372 j = 0; 2373 while (j < temp_constraints && singular_vals[j] < tol) j++; 2374 total_counts = total_counts-j; 2375 valid_constraints = temp_constraints-j; 2376 /* scale and copy POD basis into used quadrature memory */ 2377 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2378 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2379 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2381 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2382 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2383 if (j<temp_constraints) { 2384 PetscInt ii; 2385 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2387 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2388 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2389 for (k=0;k<temp_constraints-j;k++) { 2390 for (ii=0;ii<size_of_constraint;ii++) { 2391 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2392 } 2393 } 2394 } 2395 #else /* on missing GESVD */ 2396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2397 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2398 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2399 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2400 #if !defined(PETSC_USE_COMPLEX) 2401 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2402 #else 2403 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2404 #endif 2405 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2406 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2407 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2408 k = temp_constraints; 2409 if (k > size_of_constraint) k = size_of_constraint; 2410 j = 0; 2411 while (j < k && singular_vals[k-j-1] < tol) j++; 2412 valid_constraints = k-j; 2413 total_counts = total_counts-temp_constraints+valid_constraints; 2414 #endif /* on missing GESVD */ 2415 } 2416 } 2417 /* update pointers information */ 2418 if (valid_constraints) { 2419 constraints_n[total_counts_cc] = valid_constraints; 2420 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2421 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2422 /* set change_of_basis flag */ 2423 if (boolforchange) { 2424 PetscBTSet(change_basis,total_counts_cc); 2425 } 2426 total_counts_cc++; 2427 } 2428 } 2429 /* free workspace */ 2430 if (!skip_lapack) { 2431 ierr = PetscFree(work);CHKERRQ(ierr); 2432 #if defined(PETSC_USE_COMPLEX) 2433 ierr = PetscFree(rwork);CHKERRQ(ierr); 2434 #endif 2435 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2436 #if defined(PETSC_MISSING_LAPACK_GESVD) 2437 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2438 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2439 #endif 2440 } 2441 for (k=0;k<nnsp_size;k++) { 2442 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2443 } 2444 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2445 /* free index sets of faces, edges and vertices */ 2446 for (i=0;i<n_ISForFaces;i++) { 2447 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2448 } 2449 if (n_ISForFaces) { 2450 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2451 } 2452 for (i=0;i<n_ISForEdges;i++) { 2453 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2454 } 2455 if (n_ISForEdges) { 2456 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2457 } 2458 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2459 } else { 2460 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2461 2462 total_counts = 0; 2463 n_vertices = 0; 2464 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2465 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2466 } 2467 max_constraints = 0; 2468 total_counts_cc = 0; 2469 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2470 total_counts += pcbddc->adaptive_constraints_n[i]; 2471 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2472 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2473 } 2474 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2475 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2476 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2477 constraints_data = pcbddc->adaptive_constraints_data; 2478 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2479 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2480 total_counts_cc = 0; 2481 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2482 if (pcbddc->adaptive_constraints_n[i]) { 2483 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2484 } 2485 } 2486 #if 0 2487 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2488 for (i=0;i<total_counts_cc;i++) { 2489 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2490 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2491 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2492 printf(" %d",constraints_idxs[j]); 2493 } 2494 printf("\n"); 2495 printf("number of cc: %d\n",constraints_n[i]); 2496 } 2497 for (i=0;i<n_vertices;i++) { 2498 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2499 } 2500 for (i=0;i<sub_schurs->n_subs;i++) { 2501 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2502 } 2503 #endif 2504 2505 max_size_of_constraint = 0; 2506 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2507 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2508 /* Change of basis */ 2509 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2510 if (pcbddc->use_change_of_basis) { 2511 for (i=0;i<sub_schurs->n_subs;i++) { 2512 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2513 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2514 } 2515 } 2516 } 2517 } 2518 pcbddc->local_primal_size = total_counts; 2519 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2520 2521 /* map constraints_idxs in boundary numbering */ 2522 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2523 if (i != constraints_idxs_ptr[total_counts_cc]) { 2524 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2525 } 2526 2527 /* Create constraint matrix */ 2528 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2529 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2530 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2531 2532 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2533 /* determine if a QR strategy is needed for change of basis */ 2534 qr_needed = PETSC_FALSE; 2535 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2536 total_primal_vertices=0; 2537 pcbddc->local_primal_size_cc = 0; 2538 for (i=0;i<total_counts_cc;i++) { 2539 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2540 if (size_of_constraint == 1) { 2541 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2542 pcbddc->local_primal_size_cc += 1; 2543 } else if (PetscBTLookup(change_basis,i)) { 2544 for (k=0;k<constraints_n[i];k++) { 2545 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2546 } 2547 pcbddc->local_primal_size_cc += constraints_n[i]; 2548 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2549 PetscBTSet(qr_needed_idx,i); 2550 qr_needed = PETSC_TRUE; 2551 } 2552 } else { 2553 pcbddc->local_primal_size_cc += 1; 2554 } 2555 } 2556 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2557 pcbddc->n_vertices = total_primal_vertices; 2558 /* permute indices in order to have a sorted set of vertices */ 2559 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2560 2561 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2562 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2563 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2564 2565 /* nonzero structure of constraint matrix */ 2566 /* and get reference dof for local constraints */ 2567 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2568 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2569 2570 j = total_primal_vertices; 2571 total_counts = total_primal_vertices; 2572 cum = total_primal_vertices; 2573 for (i=n_vertices;i<total_counts_cc;i++) { 2574 if (!PetscBTLookup(change_basis,i)) { 2575 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2576 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2577 cum++; 2578 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2579 for (k=0;k<constraints_n[i];k++) { 2580 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2581 nnz[j+k] = size_of_constraint; 2582 } 2583 j += constraints_n[i]; 2584 } 2585 } 2586 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2587 ierr = PetscFree(nnz);CHKERRQ(ierr); 2588 2589 /* set values in constraint matrix */ 2590 for (i=0;i<total_primal_vertices;i++) { 2591 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2592 } 2593 total_counts = total_primal_vertices; 2594 for (i=n_vertices;i<total_counts_cc;i++) { 2595 if (!PetscBTLookup(change_basis,i)) { 2596 PetscInt *cols; 2597 2598 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2599 cols = constraints_idxs+constraints_idxs_ptr[i]; 2600 for (k=0;k<constraints_n[i];k++) { 2601 PetscInt row = total_counts+k; 2602 PetscScalar *vals; 2603 2604 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2605 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2606 } 2607 total_counts += constraints_n[i]; 2608 } 2609 } 2610 /* assembling */ 2611 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2612 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2613 2614 /* 2615 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2616 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2617 */ 2618 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2619 if (pcbddc->use_change_of_basis) { 2620 /* dual and primal dofs on a single cc */ 2621 PetscInt dual_dofs,primal_dofs; 2622 /* working stuff for GEQRF */ 2623 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2624 PetscBLASInt lqr_work; 2625 /* working stuff for UNGQR */ 2626 PetscScalar *gqr_work,lgqr_work_t; 2627 PetscBLASInt lgqr_work; 2628 /* working stuff for TRTRS */ 2629 PetscScalar *trs_rhs; 2630 PetscBLASInt Blas_NRHS; 2631 /* pointers for values insertion into change of basis matrix */ 2632 PetscInt *start_rows,*start_cols; 2633 PetscScalar *start_vals; 2634 /* working stuff for values insertion */ 2635 PetscBT is_primal; 2636 PetscInt *aux_primal_numbering_B; 2637 /* matrix sizes */ 2638 PetscInt global_size,local_size; 2639 /* temporary change of basis */ 2640 Mat localChangeOfBasisMatrix; 2641 /* extra space for debugging */ 2642 PetscScalar *dbg_work; 2643 2644 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2645 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2646 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2647 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2648 /* nonzeros for local mat */ 2649 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2650 for (i=0;i<pcis->n;i++) nnz[i]=1; 2651 for (i=n_vertices;i<total_counts_cc;i++) { 2652 if (PetscBTLookup(change_basis,i)) { 2653 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2654 if (PetscBTLookup(qr_needed_idx,i)) { 2655 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2656 } else { 2657 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2658 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2659 } 2660 } 2661 } 2662 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2663 ierr = PetscFree(nnz);CHKERRQ(ierr); 2664 /* Set initial identity in the matrix */ 2665 for (i=0;i<pcis->n;i++) { 2666 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2667 } 2668 2669 if (pcbddc->dbg_flag) { 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2672 } 2673 2674 2675 /* Now we loop on the constraints which need a change of basis */ 2676 /* 2677 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2678 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2679 2680 Basic blocks of change of basis matrix T computed by 2681 2682 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2683 2684 | 1 0 ... 0 s_1/S | 2685 | 0 1 ... 0 s_2/S | 2686 | ... | 2687 | 0 ... 1 s_{n-1}/S | 2688 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2689 2690 with S = \sum_{i=1}^n s_i^2 2691 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2692 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2693 2694 - QR decomposition of constraints otherwise 2695 */ 2696 if (qr_needed) { 2697 /* space to store Q */ 2698 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2699 /* first we issue queries for optimal work */ 2700 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2702 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2703 lqr_work = -1; 2704 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2705 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2706 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2707 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2708 lgqr_work = -1; 2709 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2712 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2713 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2714 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2715 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2716 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2717 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2718 /* array to store scaling factors for reflectors */ 2719 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2720 /* array to store rhs and solution of triangular solver */ 2721 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2722 /* allocating workspace for check */ 2723 if (pcbddc->dbg_flag) { 2724 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2725 } 2726 } 2727 /* array to store whether a node is primal or not */ 2728 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2729 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2730 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2731 if (i != total_primal_vertices) { 2732 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2733 } 2734 for (i=0;i<total_primal_vertices;i++) { 2735 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2736 } 2737 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2738 2739 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2740 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2741 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2742 if (PetscBTLookup(change_basis,total_counts)) { 2743 /* get constraint info */ 2744 primal_dofs = constraints_n[total_counts]; 2745 dual_dofs = size_of_constraint-primal_dofs; 2746 2747 if (pcbddc->dbg_flag) { 2748 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2749 } 2750 2751 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2752 2753 /* copy quadrature constraints for change of basis check */ 2754 if (pcbddc->dbg_flag) { 2755 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2756 } 2757 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2758 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2759 2760 /* compute QR decomposition of constraints */ 2761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2764 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2765 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2766 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2767 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2768 2769 /* explictly compute R^-T */ 2770 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2771 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2772 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2773 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2774 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2775 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2776 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2777 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2778 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2779 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2780 2781 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2784 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2786 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2787 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2788 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2790 2791 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2792 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2793 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2794 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2795 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2796 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2797 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2798 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2799 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2800 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2801 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2802 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2803 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2804 2805 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2806 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2807 /* insert cols for primal dofs */ 2808 for (j=0;j<primal_dofs;j++) { 2809 start_vals = &qr_basis[j*size_of_constraint]; 2810 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2811 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2812 } 2813 /* insert cols for dual dofs */ 2814 for (j=0,k=0;j<dual_dofs;k++) { 2815 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2816 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2817 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2818 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2819 j++; 2820 } 2821 } 2822 2823 /* check change of basis */ 2824 if (pcbddc->dbg_flag) { 2825 PetscInt ii,jj; 2826 PetscBool valid_qr=PETSC_TRUE; 2827 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2828 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2832 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2833 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2834 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2835 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2836 for (jj=0;jj<size_of_constraint;jj++) { 2837 for (ii=0;ii<primal_dofs;ii++) { 2838 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2839 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2840 } 2841 } 2842 if (!valid_qr) { 2843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2844 for (jj=0;jj<size_of_constraint;jj++) { 2845 for (ii=0;ii<primal_dofs;ii++) { 2846 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2847 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2848 } 2849 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2850 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2851 } 2852 } 2853 } 2854 } else { 2855 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2856 } 2857 } 2858 } else { /* simple transformation block */ 2859 PetscInt row,col; 2860 PetscScalar val,norm; 2861 2862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2863 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2864 for (j=0;j<size_of_constraint;j++) { 2865 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2866 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2867 if (!PetscBTLookup(is_primal,row_B)) { 2868 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2869 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2870 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2871 } else { 2872 for (k=0;k<size_of_constraint;k++) { 2873 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2874 if (row != col) { 2875 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2876 } else { 2877 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2878 } 2879 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2880 } 2881 } 2882 } 2883 if (pcbddc->dbg_flag) { 2884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2885 } 2886 } 2887 } else { 2888 if (pcbddc->dbg_flag) { 2889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2890 } 2891 } 2892 } 2893 2894 /* free workspace */ 2895 if (qr_needed) { 2896 if (pcbddc->dbg_flag) { 2897 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2898 } 2899 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2900 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2901 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2902 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2903 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2904 } 2905 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2906 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2908 2909 /* assembling of global change of variable */ 2910 { 2911 Mat tmat; 2912 PetscInt bs; 2913 2914 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2915 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2916 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2917 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2918 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2919 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2920 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2921 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2922 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2923 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2924 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2925 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2926 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2927 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2928 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2929 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2930 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2931 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2932 } 2933 /* check */ 2934 if (pcbddc->dbg_flag) { 2935 PetscReal error; 2936 Vec x,x_change; 2937 2938 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2939 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2940 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2941 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2942 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2943 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2944 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2945 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2946 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2947 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2948 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2949 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2951 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2952 ierr = VecDestroy(&x);CHKERRQ(ierr); 2953 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2954 } 2955 2956 /* adapt sub_schurs computed (if any) */ 2957 if (pcbddc->use_deluxe_scaling) { 2958 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2959 if (sub_schurs->S_Ej_all) { 2960 Mat S_new,tmat; 2961 IS is_all_N; 2962 2963 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2964 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2965 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2966 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2967 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2968 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2969 sub_schurs->S_Ej_all = S_new; 2970 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2971 if (sub_schurs->sum_S_Ej_all) { 2972 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2973 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2974 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2975 sub_schurs->sum_S_Ej_all = S_new; 2976 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2977 } 2978 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2979 } 2980 } 2981 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2982 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2983 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2984 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2985 } 2986 2987 /* set up change of basis context */ 2988 if (pcbddc->ChangeOfBasisMatrix) { 2989 PCBDDCChange_ctx change_ctx; 2990 2991 if (!pcbddc->new_global_mat) { 2992 PetscInt global_size,local_size; 2993 2994 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2995 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2996 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2997 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2998 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2999 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3000 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3001 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3002 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3003 } else { 3004 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3005 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3006 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3007 } 3008 if (!pcbddc->user_ChangeOfBasisMatrix) { 3009 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3010 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3011 } else { 3012 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3013 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3014 } 3015 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3016 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3017 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3018 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3019 } 3020 3021 /* check if a new primal space has been introduced */ 3022 pcbddc->new_primal_space_local = PETSC_TRUE; 3023 if (olocal_primal_size == pcbddc->local_primal_size) { 3024 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3025 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3026 if (!pcbddc->new_primal_space_local) { 3027 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3028 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3029 } 3030 } 3031 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3032 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3033 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3034 3035 /* flush dbg viewer */ 3036 if (pcbddc->dbg_flag) { 3037 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3038 } 3039 3040 /* free workspace */ 3041 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3042 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3043 if (!pcbddc->adaptive_selection) { 3044 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3045 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3046 } else { 3047 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3048 pcbddc->adaptive_constraints_idxs_ptr, 3049 pcbddc->adaptive_constraints_data_ptr, 3050 pcbddc->adaptive_constraints_idxs, 3051 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3052 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3053 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3054 } 3055 PetscFunctionReturn(0); 3056 } 3057 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3060 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3061 { 3062 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3063 PC_IS *pcis = (PC_IS*)pc->data; 3064 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3065 PetscInt ierr,i,vertex_size,N; 3066 PetscViewer viewer=pcbddc->dbg_viewer; 3067 3068 PetscFunctionBegin; 3069 /* Reset previously computed graph */ 3070 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3071 /* Init local Graph struct */ 3072 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3073 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3074 3075 /* Check validity of the csr graph passed in by the user */ 3076 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3077 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3078 } 3079 3080 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3081 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3082 PetscInt *xadj,*adjncy; 3083 PetscInt nvtxs; 3084 PetscBool flg_row=PETSC_FALSE; 3085 3086 if (pcbddc->use_local_adj) { 3087 3088 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3089 if (flg_row) { 3090 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3091 pcbddc->computed_rowadj = PETSC_TRUE; 3092 } 3093 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3094 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3095 IS is_dummy; 3096 ISLocalToGlobalMapping l2gmap_dummy; 3097 PetscInt j,sum; 3098 PetscInt *cxadj,*cadjncy; 3099 const PetscInt *idxs; 3100 PCBDDCGraph graph; 3101 PetscBT is_on_boundary; 3102 3103 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3104 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3105 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3106 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3107 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3108 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3109 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3110 if (flg_row) { 3111 graph->xadj = xadj; 3112 graph->adjncy = adjncy; 3113 } 3114 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3115 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3116 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3117 3118 if (pcbddc->dbg_flag) { 3119 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3120 for (i=0;i<graph->ncc;i++) { 3121 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3122 } 3123 } 3124 3125 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3126 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3127 for (i=0;i<pcis->n_B;i++) { 3128 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3129 } 3130 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3131 3132 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3133 sum = 0; 3134 for (i=0;i<graph->ncc;i++) { 3135 PetscInt sizecc = 0; 3136 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3137 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3138 sizecc++; 3139 } 3140 } 3141 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3142 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3143 cxadj[graph->queue[j]] = sizecc; 3144 } 3145 } 3146 sum += sizecc*sizecc; 3147 } 3148 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3149 sum = 0; 3150 for (i=0;i<pcis->n;i++) { 3151 PetscInt temp = cxadj[i]; 3152 cxadj[i] = sum; 3153 sum += temp; 3154 } 3155 cxadj[pcis->n] = sum; 3156 for (i=0;i<graph->ncc;i++) { 3157 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3158 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3159 PetscInt k,sizecc = 0; 3160 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3161 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3162 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3163 sizecc++; 3164 } 3165 } 3166 } 3167 } 3168 } 3169 if (sum) { 3170 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3171 } else { 3172 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3173 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3174 } 3175 graph->xadj = 0; 3176 graph->adjncy = 0; 3177 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3178 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3179 } 3180 } 3181 if (pcbddc->dbg_flag) { 3182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3183 } 3184 3185 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3186 vertex_size = 1; 3187 if (pcbddc->user_provided_isfordofs) { 3188 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3189 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3190 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3191 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3192 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3193 } 3194 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3195 pcbddc->n_ISForDofs = 0; 3196 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3197 } 3198 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3199 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3200 } else { 3201 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3202 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3203 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3204 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3205 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3206 } 3207 } 3208 } 3209 3210 /* Setup of Graph */ 3211 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3212 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3213 } 3214 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3215 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3216 } 3217 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3218 3219 /* Graph's connected components analysis */ 3220 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3221 3222 /* print some info to stdout */ 3223 if (pcbddc->dbg_flag) { 3224 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3225 } 3226 3227 /* mark topography has done */ 3228 pcbddc->recompute_topography = PETSC_FALSE; 3229 PetscFunctionReturn(0); 3230 } 3231 3232 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "PCBDDCSubsetNumbering" 3235 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3236 { 3237 PetscSF sf; 3238 PetscLayout map; 3239 const PetscInt *idxs; 3240 PetscInt *leaf_data,*root_data,*gidxs; 3241 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3242 PetscInt n_n,nlocals,start,first_index; 3243 PetscMPIInt commsize; 3244 PetscBool first_found; 3245 PetscErrorCode ierr; 3246 3247 PetscFunctionBegin; 3248 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3249 if (subset_mult) { 3250 PetscCheckSameComm(subset,1,subset_mult,2); 3251 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3252 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3253 } 3254 /* create workspace layout for computing global indices of subset */ 3255 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3256 lbounds[0] = lbounds[1] = 0; 3257 for (i=0;i<n;i++) { 3258 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3259 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3260 } 3261 lbounds[0] = -lbounds[0]; 3262 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3263 gbounds[0] = -gbounds[0]; 3264 N = gbounds[1] - gbounds[0] + 1; 3265 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3266 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3267 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3268 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3269 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3270 3271 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3272 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3273 if (subset_mult) { 3274 const PetscInt* idxs_mult; 3275 3276 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3277 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3278 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3279 } else { 3280 for (i=0;i<n;i++) leaf_data[i] = 1; 3281 } 3282 /* local size of new subset */ 3283 n_n = 0; 3284 for (i=0;i<n;i++) n_n += leaf_data[i]; 3285 3286 /* global indexes in layout */ 3287 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3288 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3289 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3290 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3291 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3292 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3293 3294 /* reduce from leaves to roots */ 3295 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3296 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3297 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3298 3299 /* count indexes in local part of layout */ 3300 nlocals = 0; 3301 first_index = -1; 3302 first_found = PETSC_FALSE; 3303 for (i=0;i<Nl;i++) { 3304 if (!first_found && root_data[i]) { 3305 first_found = PETSC_TRUE; 3306 first_index = i; 3307 } 3308 nlocals += root_data[i]; 3309 } 3310 3311 /* cumulative of number of indexes and size of subset without holes */ 3312 #if defined(PETSC_HAVE_MPI_EXSCAN) 3313 start = 0; 3314 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3315 #else 3316 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3317 start = start-nlocals; 3318 #endif 3319 3320 if (N_n) { /* compute total size of new subset if requested */ 3321 *N_n = start + nlocals; 3322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3323 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3324 } 3325 3326 /* adapt root data with cumulative */ 3327 if (first_found) { 3328 PetscInt old_index; 3329 3330 root_data[first_index] += start; 3331 old_index = first_index; 3332 for (i=first_index+1;i<Nl;i++) { 3333 if (root_data[i]) { 3334 root_data[i] += root_data[old_index]; 3335 old_index = i; 3336 } 3337 } 3338 } 3339 3340 /* from roots to leaves */ 3341 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3342 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3343 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3344 3345 /* create new IS with global indexes without holes */ 3346 if (subset_mult) { 3347 const PetscInt* idxs_mult; 3348 PetscInt cum; 3349 3350 cum = 0; 3351 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3352 for (i=0;i<n;i++) { 3353 PetscInt j; 3354 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3355 } 3356 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3357 } else { 3358 for (i=0;i<n;i++) { 3359 gidxs[i] = leaf_data[i]-1; 3360 } 3361 } 3362 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3363 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3364 PetscFunctionReturn(0); 3365 } 3366 3367 #undef __FUNCT__ 3368 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3369 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3370 { 3371 PetscInt i,j; 3372 PetscScalar *alphas; 3373 PetscErrorCode ierr; 3374 3375 PetscFunctionBegin; 3376 /* this implements stabilized Gram-Schmidt */ 3377 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3378 for (i=0;i<n;i++) { 3379 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3380 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3381 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3382 } 3383 ierr = PetscFree(alphas);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "MatISGetSubassemblingPattern" 3389 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3390 { 3391 IS ranks_send_to; 3392 PetscInt n_neighs,*neighs,*n_shared,**shared; 3393 PetscMPIInt size,rank,color; 3394 PetscInt *xadj,*adjncy; 3395 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3396 PetscInt i,local_size,threshold=0; 3397 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3398 PetscSubcomm subcomm; 3399 PetscErrorCode ierr; 3400 3401 PetscFunctionBegin; 3402 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3403 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3404 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3405 3406 /* Get info on mapping */ 3407 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3408 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3409 3410 /* build local CSR graph of subdomains' connectivity */ 3411 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3412 xadj[0] = 0; 3413 xadj[1] = PetscMax(n_neighs-1,0); 3414 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3415 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3416 3417 if (threshold) { 3418 PetscInt xadj_count = 0; 3419 for (i=1;i<n_neighs;i++) { 3420 if (n_shared[i] > threshold) { 3421 adjncy[xadj_count] = neighs[i]; 3422 adjncy_wgt[xadj_count] = n_shared[i]; 3423 xadj_count++; 3424 } 3425 } 3426 xadj[1] = xadj_count; 3427 } else { 3428 if (xadj[1]) { 3429 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3430 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3431 } 3432 } 3433 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3434 if (use_square) { 3435 for (i=0;i<xadj[1];i++) { 3436 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3437 } 3438 } 3439 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3440 3441 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3442 3443 /* 3444 Restrict work on active processes only. 3445 */ 3446 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3447 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3448 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3449 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3450 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3451 if (color) { 3452 ierr = PetscFree(xadj);CHKERRQ(ierr); 3453 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3454 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3455 } else { 3456 Mat subdomain_adj; 3457 IS new_ranks,new_ranks_contig; 3458 MatPartitioning partitioner; 3459 PetscInt prank,rstart=0,rend=0; 3460 PetscInt *is_indices,*oldranks; 3461 PetscBool aggregate; 3462 3463 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3464 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3465 prank = rank; 3466 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3467 /* 3468 for (i=0;i<size;i++) { 3469 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3470 } 3471 */ 3472 for (i=0;i<xadj[1];i++) { 3473 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3474 } 3475 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3476 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3477 if (aggregate) { 3478 PetscInt lrows,row,ncols,*cols; 3479 PetscMPIInt nrank; 3480 PetscScalar *vals; 3481 3482 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3483 lrows = 0; 3484 if (nrank<redprocs) { 3485 lrows = size/redprocs; 3486 if (nrank<size%redprocs) lrows++; 3487 } 3488 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3489 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3490 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3491 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3492 row = nrank; 3493 ncols = xadj[1]-xadj[0]; 3494 cols = adjncy; 3495 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3496 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3497 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3498 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3499 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3500 ierr = PetscFree(xadj);CHKERRQ(ierr); 3501 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3502 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3503 ierr = PetscFree(vals);CHKERRQ(ierr); 3504 } else { 3505 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3506 } 3507 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3508 3509 /* Partition */ 3510 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3511 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3512 if (use_vwgt) { 3513 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3514 v_wgt[0] = local_size; 3515 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3516 } 3517 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3518 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3519 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3520 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3521 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3522 3523 /* renumber new_ranks to avoid "holes" in new set of processors */ 3524 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3525 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3526 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3527 if (!redprocs) { 3528 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3529 } else { 3530 PetscInt idxs[1]; 3531 PetscMPIInt tag; 3532 MPI_Request *reqs; 3533 3534 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3536 for (i=rstart;i<rend;i++) { 3537 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3538 } 3539 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3540 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3541 ierr = PetscFree(reqs);CHKERRQ(ierr); 3542 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3543 } 3544 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3545 /* clean up */ 3546 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3547 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3548 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3549 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3550 } 3551 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3552 3553 /* assemble parallel IS for sends */ 3554 i = 1; 3555 if (color) i=0; 3556 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3557 /* get back IS */ 3558 *is_sends = ranks_send_to; 3559 PetscFunctionReturn(0); 3560 } 3561 3562 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatISSubassemble" 3566 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3567 { 3568 Mat local_mat; 3569 IS is_sends_internal; 3570 PetscInt rows,cols,new_local_rows; 3571 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3572 PetscBool ismatis,isdense,newisdense,destroy_mat; 3573 ISLocalToGlobalMapping l2gmap; 3574 PetscInt* l2gmap_indices; 3575 const PetscInt* is_indices; 3576 MatType new_local_type; 3577 /* buffers */ 3578 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3579 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3580 PetscInt *recv_buffer_idxs_local; 3581 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3582 /* MPI */ 3583 MPI_Comm comm,comm_n; 3584 PetscSubcomm subcomm; 3585 PetscMPIInt n_sends,n_recvs,commsize; 3586 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3587 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3588 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3589 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3590 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3591 PetscErrorCode ierr; 3592 3593 PetscFunctionBegin; 3594 /* TODO: add missing checks */ 3595 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3596 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3597 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3598 PetscValidLogicalCollectiveInt(mat,nis,7); 3599 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3600 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3601 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3602 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3603 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3604 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3605 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3606 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3607 PetscInt mrows,mcols,mnrows,mncols; 3608 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3609 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3610 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3611 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3612 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3613 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3614 } 3615 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3616 PetscValidLogicalCollectiveInt(mat,bs,0); 3617 /* prepare IS for sending if not provided */ 3618 if (!is_sends) { 3619 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3620 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3621 } else { 3622 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3623 is_sends_internal = is_sends; 3624 } 3625 3626 /* get comm */ 3627 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3628 3629 /* compute number of sends */ 3630 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3631 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3632 3633 /* compute number of receives */ 3634 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3635 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3636 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3637 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3638 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3639 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3640 ierr = PetscFree(iflags);CHKERRQ(ierr); 3641 3642 /* restrict comm if requested */ 3643 subcomm = 0; 3644 destroy_mat = PETSC_FALSE; 3645 if (restrict_comm) { 3646 PetscMPIInt color,subcommsize; 3647 3648 color = 0; 3649 if (restrict_full) { 3650 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3651 } else { 3652 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3653 } 3654 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3655 subcommsize = commsize - subcommsize; 3656 /* check if reuse has been requested */ 3657 if (reuse == MAT_REUSE_MATRIX) { 3658 if (*mat_n) { 3659 PetscMPIInt subcommsize2; 3660 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3661 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3662 comm_n = PetscObjectComm((PetscObject)*mat_n); 3663 } else { 3664 comm_n = PETSC_COMM_SELF; 3665 } 3666 } else { /* MAT_INITIAL_MATRIX */ 3667 PetscMPIInt rank; 3668 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3671 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3672 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3673 comm_n = PetscSubcommChild(subcomm); 3674 } 3675 /* flag to destroy *mat_n if not significative */ 3676 if (color) destroy_mat = PETSC_TRUE; 3677 } else { 3678 comm_n = comm; 3679 } 3680 3681 /* prepare send/receive buffers */ 3682 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3683 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3684 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3685 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3686 if (nis) { 3687 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3688 } 3689 3690 /* Get data from local matrices */ 3691 if (!isdense) { 3692 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3693 /* TODO: See below some guidelines on how to prepare the local buffers */ 3694 /* 3695 send_buffer_vals should contain the raw values of the local matrix 3696 send_buffer_idxs should contain: 3697 - MatType_PRIVATE type 3698 - PetscInt size_of_l2gmap 3699 - PetscInt global_row_indices[size_of_l2gmap] 3700 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3701 */ 3702 } else { 3703 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3704 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3705 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3706 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3707 send_buffer_idxs[1] = i; 3708 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3709 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3710 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3711 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3712 for (i=0;i<n_sends;i++) { 3713 ilengths_vals[is_indices[i]] = len*len; 3714 ilengths_idxs[is_indices[i]] = len+2; 3715 } 3716 } 3717 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3718 /* additional is (if any) */ 3719 if (nis) { 3720 PetscMPIInt psum; 3721 PetscInt j; 3722 for (j=0,psum=0;j<nis;j++) { 3723 PetscInt plen; 3724 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3725 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3726 psum += len+1; /* indices + lenght */ 3727 } 3728 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3729 for (j=0,psum=0;j<nis;j++) { 3730 PetscInt plen; 3731 const PetscInt *is_array_idxs; 3732 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3733 send_buffer_idxs_is[psum] = plen; 3734 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3735 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3736 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3737 psum += plen+1; /* indices + lenght */ 3738 } 3739 for (i=0;i<n_sends;i++) { 3740 ilengths_idxs_is[is_indices[i]] = psum; 3741 } 3742 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3743 } 3744 3745 buf_size_idxs = 0; 3746 buf_size_vals = 0; 3747 buf_size_idxs_is = 0; 3748 for (i=0;i<n_recvs;i++) { 3749 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3750 buf_size_vals += (PetscInt)olengths_vals[i]; 3751 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3752 } 3753 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3754 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3755 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3756 3757 /* get new tags for clean communications */ 3758 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3759 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3760 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3761 3762 /* allocate for requests */ 3763 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3764 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3765 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3766 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3767 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3768 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3769 3770 /* communications */ 3771 ptr_idxs = recv_buffer_idxs; 3772 ptr_vals = recv_buffer_vals; 3773 ptr_idxs_is = recv_buffer_idxs_is; 3774 for (i=0;i<n_recvs;i++) { 3775 source_dest = onodes[i]; 3776 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3777 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3778 ptr_idxs += olengths_idxs[i]; 3779 ptr_vals += olengths_vals[i]; 3780 if (nis) { 3781 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3782 ptr_idxs_is += olengths_idxs_is[i]; 3783 } 3784 } 3785 for (i=0;i<n_sends;i++) { 3786 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3787 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3788 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3789 if (nis) { 3790 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3791 } 3792 } 3793 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3794 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3795 3796 /* assemble new l2g map */ 3797 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3798 ptr_idxs = recv_buffer_idxs; 3799 new_local_rows = 0; 3800 for (i=0;i<n_recvs;i++) { 3801 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3802 ptr_idxs += olengths_idxs[i]; 3803 } 3804 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3805 ptr_idxs = recv_buffer_idxs; 3806 new_local_rows = 0; 3807 for (i=0;i<n_recvs;i++) { 3808 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3809 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3810 ptr_idxs += olengths_idxs[i]; 3811 } 3812 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3813 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3814 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3815 3816 /* infer new local matrix type from received local matrices type */ 3817 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3818 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3819 if (n_recvs) { 3820 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3821 ptr_idxs = recv_buffer_idxs; 3822 for (i=0;i<n_recvs;i++) { 3823 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3824 new_local_type_private = MATAIJ_PRIVATE; 3825 break; 3826 } 3827 ptr_idxs += olengths_idxs[i]; 3828 } 3829 switch (new_local_type_private) { 3830 case MATDENSE_PRIVATE: 3831 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3832 new_local_type = MATSEQAIJ; 3833 bs = 1; 3834 } else { /* if I receive only 1 dense matrix */ 3835 new_local_type = MATSEQDENSE; 3836 bs = 1; 3837 } 3838 break; 3839 case MATAIJ_PRIVATE: 3840 new_local_type = MATSEQAIJ; 3841 bs = 1; 3842 break; 3843 case MATBAIJ_PRIVATE: 3844 new_local_type = MATSEQBAIJ; 3845 break; 3846 case MATSBAIJ_PRIVATE: 3847 new_local_type = MATSEQSBAIJ; 3848 break; 3849 default: 3850 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3851 break; 3852 } 3853 } else { /* by default, new_local_type is seqdense */ 3854 new_local_type = MATSEQDENSE; 3855 bs = 1; 3856 } 3857 3858 /* create MATIS object if needed */ 3859 if (reuse == MAT_INITIAL_MATRIX) { 3860 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3861 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3862 } else { 3863 /* it also destroys the local matrices */ 3864 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3865 } 3866 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3867 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3868 3869 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3870 3871 /* Global to local map of received indices */ 3872 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3873 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3874 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3875 3876 /* restore attributes -> type of incoming data and its size */ 3877 buf_size_idxs = 0; 3878 for (i=0;i<n_recvs;i++) { 3879 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3880 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3881 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3882 } 3883 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3884 3885 /* set preallocation */ 3886 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3887 if (!newisdense) { 3888 PetscInt *new_local_nnz=0; 3889 3890 ptr_vals = recv_buffer_vals; 3891 ptr_idxs = recv_buffer_idxs_local; 3892 if (n_recvs) { 3893 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3894 } 3895 for (i=0;i<n_recvs;i++) { 3896 PetscInt j; 3897 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3898 for (j=0;j<*(ptr_idxs+1);j++) { 3899 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3900 } 3901 } else { 3902 /* TODO */ 3903 } 3904 ptr_idxs += olengths_idxs[i]; 3905 } 3906 if (new_local_nnz) { 3907 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3908 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3909 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3910 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3911 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3912 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3913 } else { 3914 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3915 } 3916 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3917 } else { 3918 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3919 } 3920 3921 /* set values */ 3922 ptr_vals = recv_buffer_vals; 3923 ptr_idxs = recv_buffer_idxs_local; 3924 for (i=0;i<n_recvs;i++) { 3925 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3926 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3927 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3928 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3929 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3930 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3931 } else { 3932 /* TODO */ 3933 } 3934 ptr_idxs += olengths_idxs[i]; 3935 ptr_vals += olengths_vals[i]; 3936 } 3937 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3938 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3939 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3940 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3941 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3942 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3943 3944 #if 0 3945 if (!restrict_comm) { /* check */ 3946 Vec lvec,rvec; 3947 PetscReal infty_error; 3948 3949 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3950 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3951 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3952 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3953 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3954 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3955 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3956 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3957 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3958 } 3959 #endif 3960 3961 /* assemble new additional is (if any) */ 3962 if (nis) { 3963 PetscInt **temp_idxs,*count_is,j,psum; 3964 3965 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3966 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3967 ptr_idxs = recv_buffer_idxs_is; 3968 psum = 0; 3969 for (i=0;i<n_recvs;i++) { 3970 for (j=0;j<nis;j++) { 3971 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3972 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3973 psum += plen; 3974 ptr_idxs += plen+1; /* shift pointer to received data */ 3975 } 3976 } 3977 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3978 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3979 for (i=1;i<nis;i++) { 3980 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3981 } 3982 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3983 ptr_idxs = recv_buffer_idxs_is; 3984 for (i=0;i<n_recvs;i++) { 3985 for (j=0;j<nis;j++) { 3986 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3987 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3988 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3989 ptr_idxs += plen+1; /* shift pointer to received data */ 3990 } 3991 } 3992 for (i=0;i<nis;i++) { 3993 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3994 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3995 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3996 } 3997 ierr = PetscFree(count_is);CHKERRQ(ierr); 3998 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3999 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4000 } 4001 /* free workspace */ 4002 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4003 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4004 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4005 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4006 if (isdense) { 4007 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4008 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4009 } else { 4010 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4011 } 4012 if (nis) { 4013 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4014 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4015 } 4016 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4017 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4018 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4019 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4020 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4021 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4022 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4023 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4024 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4025 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4026 ierr = PetscFree(onodes);CHKERRQ(ierr); 4027 if (nis) { 4028 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4029 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4030 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4031 } 4032 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4033 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4034 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4035 for (i=0;i<nis;i++) { 4036 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4037 } 4038 *mat_n = NULL; 4039 } 4040 PetscFunctionReturn(0); 4041 } 4042 4043 /* temporary hack into ksp private data structure */ 4044 #include <petsc/private/kspimpl.h> 4045 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4048 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4049 { 4050 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4051 PC_IS *pcis = (PC_IS*)pc->data; 4052 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4053 MatNullSpace CoarseNullSpace=NULL; 4054 ISLocalToGlobalMapping coarse_islg; 4055 IS coarse_is,*isarray; 4056 PetscInt i,im_active=-1,active_procs=-1; 4057 PetscInt nis,nisdofs,nisneu; 4058 PC pc_temp; 4059 PCType coarse_pc_type; 4060 KSPType coarse_ksp_type; 4061 PetscBool multilevel_requested,multilevel_allowed; 4062 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4063 Mat t_coarse_mat_is; 4064 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4065 PetscMPIInt all_procs; 4066 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4067 PetscBool compute_vecs = PETSC_FALSE; 4068 PetscScalar *array; 4069 PetscErrorCode ierr; 4070 4071 PetscFunctionBegin; 4072 /* Assign global numbering to coarse dofs */ 4073 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4074 PetscInt ocoarse_size; 4075 compute_vecs = PETSC_TRUE; 4076 ocoarse_size = pcbddc->coarse_size; 4077 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4078 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4079 /* see if we can avoid some work */ 4080 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4081 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4082 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4083 PC pc; 4084 PetscBool isbddc; 4085 4086 /* temporary workaround since PCBDDC does not have a reset method so far */ 4087 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4088 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4089 if (isbddc) { 4090 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4091 } 4092 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4093 coarse_reuse = PETSC_FALSE; 4094 } else { /* we can safely reuse already computed coarse matrix */ 4095 coarse_reuse = PETSC_TRUE; 4096 } 4097 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4098 coarse_reuse = PETSC_FALSE; 4099 } 4100 /* reset any subassembling information */ 4101 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4102 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4103 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4104 coarse_reuse = PETSC_TRUE; 4105 } 4106 4107 /* count "active" (i.e. with positive local size) and "void" processes */ 4108 im_active = !!(pcis->n); 4109 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4110 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4111 void_procs = all_procs-active_procs; 4112 csin_type_simple = PETSC_TRUE; 4113 redist = PETSC_FALSE; 4114 if (pcbddc->current_level && void_procs) { 4115 csin_ml = PETSC_TRUE; 4116 ncoarse_ml = void_procs; 4117 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4118 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4119 csin_ds = PETSC_TRUE; 4120 ncoarse_ds = pcbddc->redistribute_coarse; 4121 redist = PETSC_TRUE; 4122 } else { 4123 csin_ds = PETSC_TRUE; 4124 ncoarse_ds = active_procs; 4125 redist = PETSC_TRUE; 4126 } 4127 } else { 4128 csin_ml = PETSC_FALSE; 4129 ncoarse_ml = all_procs; 4130 if (void_procs) { 4131 csin_ds = PETSC_TRUE; 4132 ncoarse_ds = void_procs; 4133 csin_type_simple = PETSC_FALSE; 4134 } else { 4135 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4136 csin_ds = PETSC_TRUE; 4137 ncoarse_ds = pcbddc->redistribute_coarse; 4138 redist = PETSC_TRUE; 4139 } else { 4140 csin_ds = PETSC_FALSE; 4141 ncoarse_ds = all_procs; 4142 } 4143 } 4144 } 4145 4146 /* 4147 test if we can go multilevel: three conditions must be satisfied: 4148 - we have not exceeded the number of levels requested 4149 - we can actually subassemble the active processes 4150 - we can find a suitable number of MPI processes where we can place the subassembled problem 4151 */ 4152 multilevel_allowed = PETSC_FALSE; 4153 multilevel_requested = PETSC_FALSE; 4154 if (pcbddc->current_level < pcbddc->max_levels) { 4155 multilevel_requested = PETSC_TRUE; 4156 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4157 multilevel_allowed = PETSC_FALSE; 4158 } else { 4159 multilevel_allowed = PETSC_TRUE; 4160 } 4161 } 4162 /* determine number of process partecipating to coarse solver */ 4163 if (multilevel_allowed) { 4164 ncoarse = ncoarse_ml; 4165 csin = csin_ml; 4166 redist = PETSC_FALSE; 4167 } else { 4168 ncoarse = ncoarse_ds; 4169 csin = csin_ds; 4170 } 4171 4172 /* creates temporary l2gmap and IS for coarse indexes */ 4173 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4174 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4175 4176 /* creates temporary MATIS object for coarse matrix */ 4177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4178 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4179 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4180 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4181 #if 0 4182 { 4183 PetscViewer viewer; 4184 char filename[256]; 4185 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4186 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4187 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4188 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4189 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4190 } 4191 #endif 4192 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4193 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4194 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4195 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4196 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4197 4198 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4199 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4200 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4201 const PetscInt *idxs; 4202 ISLocalToGlobalMapping tmap; 4203 4204 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4205 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4206 /* allocate space for temporary storage */ 4207 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4208 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4209 /* allocate for IS array */ 4210 nisdofs = pcbddc->n_ISForDofsLocal; 4211 nisneu = !!pcbddc->NeumannBoundariesLocal; 4212 nis = nisdofs + nisneu; 4213 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4214 /* dofs splitting */ 4215 for (i=0;i<nisdofs;i++) { 4216 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4217 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4218 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4219 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4220 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4221 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4223 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4224 } 4225 /* neumann boundaries */ 4226 if (pcbddc->NeumannBoundariesLocal) { 4227 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4228 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4229 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4230 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4231 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4232 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4233 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4234 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4235 } 4236 /* free memory */ 4237 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4238 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4239 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4240 } else { 4241 nis = 0; 4242 nisdofs = 0; 4243 nisneu = 0; 4244 isarray = NULL; 4245 } 4246 /* destroy no longer needed map */ 4247 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4248 4249 /* restrict on coarse candidates (if needed) */ 4250 coarse_mat_is = NULL; 4251 if (csin) { 4252 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4253 if (redist) { 4254 PetscMPIInt rank; 4255 PetscInt spc,n_spc_p1,dest[1],destsize; 4256 4257 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4258 spc = active_procs/ncoarse; 4259 n_spc_p1 = active_procs%ncoarse; 4260 if (im_active) { 4261 destsize = 1; 4262 if (rank > n_spc_p1*(spc+1)-1) { 4263 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4264 } else { 4265 dest[0] = rank/(spc+1); 4266 } 4267 } else { 4268 destsize = 0; 4269 } 4270 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4271 } else if (csin_type_simple) { 4272 PetscMPIInt rank; 4273 PetscInt issize,isidx; 4274 4275 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4276 if (im_active) { 4277 issize = 1; 4278 isidx = (PetscInt)rank; 4279 } else { 4280 issize = 0; 4281 isidx = -1; 4282 } 4283 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4284 } else { /* get a suitable subassembling pattern from MATIS code */ 4285 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4286 } 4287 4288 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4289 if (!redist || ncoarse <= void_procs) { 4290 PetscInt ncoarse_cand,tissize,*nisindices; 4291 PetscInt *coarse_candidates; 4292 const PetscInt* tisindices; 4293 4294 /* get coarse candidates' ranks in pc communicator */ 4295 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4296 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4297 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4298 if (!coarse_candidates[i]) { 4299 coarse_candidates[ncoarse_cand++]=i; 4300 } 4301 } 4302 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4303 4304 4305 if (pcbddc->dbg_flag) { 4306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4308 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4309 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4310 for (i=0;i<ncoarse_cand;i++) { 4311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4312 } 4313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4315 } 4316 /* shift the pattern on coarse candidates */ 4317 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4318 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4319 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4320 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4321 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4322 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4323 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4324 } 4325 if (pcbddc->dbg_flag) { 4326 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4327 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4328 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4330 } 4331 } 4332 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4333 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4334 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4335 } else { /* this is the last level, so use just receiving processes in subcomm */ 4336 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4337 } 4338 } else { 4339 if (pcbddc->dbg_flag) { 4340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4342 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4343 } 4344 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4345 coarse_mat_is = t_coarse_mat_is; 4346 } 4347 4348 /* create local to global scatters for coarse problem */ 4349 if (compute_vecs) { 4350 PetscInt lrows; 4351 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4352 if (coarse_mat_is) { 4353 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4354 } else { 4355 lrows = 0; 4356 } 4357 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4358 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4359 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4360 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4361 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4362 } 4363 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4364 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4365 4366 /* set defaults for coarse KSP and PC */ 4367 if (multilevel_allowed) { 4368 coarse_ksp_type = KSPRICHARDSON; 4369 coarse_pc_type = PCBDDC; 4370 } else { 4371 coarse_ksp_type = KSPPREONLY; 4372 coarse_pc_type = PCREDUNDANT; 4373 } 4374 4375 /* print some info if requested */ 4376 if (pcbddc->dbg_flag) { 4377 if (!multilevel_allowed) { 4378 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4379 if (multilevel_requested) { 4380 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4381 } else if (pcbddc->max_levels) { 4382 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4383 } 4384 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4385 } 4386 } 4387 4388 /* create the coarse KSP object only once with defaults */ 4389 if (coarse_mat_is) { 4390 MatReuse coarse_mat_reuse; 4391 PetscViewer dbg_viewer = NULL; 4392 if (pcbddc->dbg_flag) { 4393 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4394 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4395 } 4396 if (!pcbddc->coarse_ksp) { 4397 char prefix[256],str_level[16]; 4398 size_t len; 4399 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4400 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4401 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4402 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4403 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4404 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4405 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4406 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4407 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4408 /* prefix */ 4409 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4410 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4411 if (!pcbddc->current_level) { 4412 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4413 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4414 } else { 4415 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4416 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4417 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4418 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4419 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4420 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4421 } 4422 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4423 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4424 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4425 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4426 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4427 /* allow user customization */ 4428 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4429 } 4430 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4431 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4432 if (nisdofs) { 4433 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4434 for (i=0;i<nisdofs;i++) { 4435 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4436 } 4437 } 4438 if (nisneu) { 4439 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4440 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4441 } 4442 4443 /* get some info after set from options */ 4444 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4445 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4446 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4447 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4448 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4449 isbddc = PETSC_FALSE; 4450 } 4451 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4452 if (isredundant) { 4453 KSP inner_ksp; 4454 PC inner_pc; 4455 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4456 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4457 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4458 } 4459 4460 /* assemble coarse matrix */ 4461 if (coarse_reuse) { 4462 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4463 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4464 coarse_mat_reuse = MAT_REUSE_MATRIX; 4465 } else { 4466 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4467 } 4468 if (isbddc || isnn) { 4469 if (pcbddc->coarsening_ratio > 1) { 4470 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4471 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4472 if (pcbddc->dbg_flag) { 4473 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4474 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4475 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4476 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4477 } 4478 } 4479 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4480 } else { 4481 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4482 coarse_mat = coarse_mat_is; 4483 } 4484 } else { 4485 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4486 } 4487 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4488 4489 /* propagate symmetry info of coarse matrix */ 4490 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4491 if (pc->pmat->symmetric_set) { 4492 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4493 } 4494 if (pc->pmat->hermitian_set) { 4495 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4496 } 4497 if (pc->pmat->spd_set) { 4498 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4499 } 4500 /* set operators */ 4501 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4502 if (pcbddc->dbg_flag) { 4503 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4504 } 4505 } else { /* processes non partecipating to coarse solver (if any) */ 4506 coarse_mat = 0; 4507 } 4508 ierr = PetscFree(isarray);CHKERRQ(ierr); 4509 #if 0 4510 { 4511 PetscViewer viewer; 4512 char filename[256]; 4513 sprintf(filename,"coarse_mat.m"); 4514 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4515 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4516 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4517 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4518 } 4519 #endif 4520 4521 /* Compute coarse null space (special handling by BDDC only) */ 4522 #if 0 4523 if (pcbddc->NullSpace) { 4524 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4525 } 4526 #endif 4527 4528 if (pcbddc->coarse_ksp) { 4529 Vec crhs,csol; 4530 PetscBool ispreonly; 4531 4532 if (CoarseNullSpace) { 4533 if (isbddc) { 4534 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4535 } else { 4536 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4537 } 4538 } 4539 /* setup coarse ksp */ 4540 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4541 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4542 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4543 /* hack */ 4544 if (!csol) { 4545 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4546 } 4547 if (!crhs) { 4548 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4549 } 4550 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4551 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4552 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4553 KSP check_ksp; 4554 KSPType check_ksp_type; 4555 PC check_pc; 4556 Vec check_vec,coarse_vec; 4557 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4558 PetscInt its; 4559 PetscBool compute_eigs; 4560 PetscReal *eigs_r,*eigs_c; 4561 PetscInt neigs; 4562 const char *prefix; 4563 4564 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4565 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4566 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4567 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4568 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4569 if (ispreonly) { 4570 check_ksp_type = KSPPREONLY; 4571 compute_eigs = PETSC_FALSE; 4572 } else { 4573 check_ksp_type = KSPGMRES; 4574 compute_eigs = PETSC_TRUE; 4575 } 4576 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4577 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4578 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4579 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4580 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4581 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4582 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4583 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4584 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4585 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4586 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4587 /* create random vec */ 4588 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4589 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4590 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4591 if (CoarseNullSpace) { 4592 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4593 } 4594 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4595 /* solve coarse problem */ 4596 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4597 if (CoarseNullSpace) { 4598 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4599 } 4600 /* set eigenvalue estimation if preonly has not been requested */ 4601 if (compute_eigs) { 4602 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4603 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4604 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4605 lambda_max = eigs_r[neigs-1]; 4606 lambda_min = eigs_r[0]; 4607 if (pcbddc->use_coarse_estimates) { 4608 if (lambda_max>lambda_min) { 4609 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4610 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4611 } 4612 } 4613 } 4614 4615 /* check coarse problem residual error */ 4616 if (pcbddc->dbg_flag) { 4617 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4618 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4619 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4620 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4621 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4622 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4623 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4624 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4625 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4626 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4627 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4628 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4629 if (compute_eigs) { 4630 PetscReal lambda_max_s,lambda_min_s; 4631 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4632 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4633 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4634 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4635 for (i=0;i<neigs;i++) { 4636 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4637 } 4638 } 4639 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4640 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4641 } 4642 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4643 if (compute_eigs) { 4644 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4645 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4646 } 4647 } 4648 } 4649 /* print additional info */ 4650 if (pcbddc->dbg_flag) { 4651 /* waits until all processes reaches this point */ 4652 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4655 } 4656 4657 /* free memory */ 4658 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4659 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 #undef __FUNCT__ 4664 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4665 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4666 { 4667 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4668 PC_IS* pcis = (PC_IS*)pc->data; 4669 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4670 IS subset,subset_mult,subset_n; 4671 PetscInt local_size,coarse_size=0; 4672 PetscInt *local_primal_indices=NULL; 4673 const PetscInt *t_local_primal_indices; 4674 PetscErrorCode ierr; 4675 4676 PetscFunctionBegin; 4677 /* Compute global number of coarse dofs */ 4678 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4679 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4680 } 4681 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4682 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4683 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4684 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4685 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4686 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4687 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4688 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4689 if (local_size != pcbddc->local_primal_size) { 4690 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4691 } 4692 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4693 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4694 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4695 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4696 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4697 4698 /* check numbering */ 4699 if (pcbddc->dbg_flag) { 4700 PetscScalar coarsesum,*array; 4701 PetscInt i; 4702 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4703 4704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4706 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4707 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4708 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4709 for (i=0;i<pcbddc->local_primal_size;i++) { 4710 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4711 } 4712 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4713 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4714 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4715 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4716 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4717 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4718 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4719 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4720 for (i=0;i<pcis->n;i++) { 4721 if (array[i] == 1.0) { 4722 set_error = PETSC_TRUE; 4723 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4724 } 4725 } 4726 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4728 for (i=0;i<pcis->n;i++) { 4729 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4730 } 4731 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4732 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4733 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4734 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4735 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4736 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4737 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4738 PetscInt *gidxs; 4739 4740 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4741 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4742 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4743 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4745 for (i=0;i<pcbddc->local_primal_size;i++) { 4746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4747 } 4748 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4749 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4750 } 4751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4752 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4753 } 4754 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4755 /* get back data */ 4756 *coarse_size_n = coarse_size; 4757 *local_primal_indices_n = local_primal_indices; 4758 PetscFunctionReturn(0); 4759 } 4760 4761 #undef __FUNCT__ 4762 #define __FUNCT__ "PCBDDCGlobalToLocal" 4763 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4764 { 4765 IS localis_t; 4766 PetscInt i,lsize,*idxs,n; 4767 PetscScalar *vals; 4768 PetscErrorCode ierr; 4769 4770 PetscFunctionBegin; 4771 /* get indices in local ordering exploiting local to global map */ 4772 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4773 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4774 for (i=0;i<lsize;i++) vals[i] = 1.0; 4775 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4776 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4777 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4778 if (idxs) { /* multilevel guard */ 4779 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4780 } 4781 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4782 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4783 ierr = PetscFree(vals);CHKERRQ(ierr); 4784 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4785 /* now compute set in local ordering */ 4786 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4787 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4788 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4789 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4790 for (i=0,lsize=0;i<n;i++) { 4791 if (PetscRealPart(vals[i]) > 0.5) { 4792 lsize++; 4793 } 4794 } 4795 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4796 for (i=0,lsize=0;i<n;i++) { 4797 if (PetscRealPart(vals[i]) > 0.5) { 4798 idxs[lsize++] = i; 4799 } 4800 } 4801 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4803 *localis = localis_t; 4804 PetscFunctionReturn(0); 4805 } 4806 4807 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4808 #undef __FUNCT__ 4809 #define __FUNCT__ "PCBDDCMatMult_Private" 4810 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4811 { 4812 PCBDDCChange_ctx change_ctx; 4813 PetscErrorCode ierr; 4814 4815 PetscFunctionBegin; 4816 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4817 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4818 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4819 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4820 PetscFunctionReturn(0); 4821 } 4822 4823 #undef __FUNCT__ 4824 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4825 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4826 { 4827 PCBDDCChange_ctx change_ctx; 4828 PetscErrorCode ierr; 4829 4830 PetscFunctionBegin; 4831 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4832 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4833 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4834 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4835 PetscFunctionReturn(0); 4836 } 4837 4838 #undef __FUNCT__ 4839 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4840 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4841 { 4842 PC_IS *pcis=(PC_IS*)pc->data; 4843 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4844 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4845 Mat S_j; 4846 PetscInt *used_xadj,*used_adjncy; 4847 PetscBool free_used_adj; 4848 PetscErrorCode ierr; 4849 4850 PetscFunctionBegin; 4851 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4852 free_used_adj = PETSC_FALSE; 4853 if (pcbddc->sub_schurs_layers == -1) { 4854 used_xadj = NULL; 4855 used_adjncy = NULL; 4856 } else { 4857 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4858 used_xadj = pcbddc->mat_graph->xadj; 4859 used_adjncy = pcbddc->mat_graph->adjncy; 4860 } else if (pcbddc->computed_rowadj) { 4861 used_xadj = pcbddc->mat_graph->xadj; 4862 used_adjncy = pcbddc->mat_graph->adjncy; 4863 } else { 4864 PetscBool flg_row=PETSC_FALSE; 4865 const PetscInt *xadj,*adjncy; 4866 PetscInt nvtxs; 4867 4868 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4869 if (flg_row) { 4870 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4871 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4872 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4873 free_used_adj = PETSC_TRUE; 4874 } else { 4875 pcbddc->sub_schurs_layers = -1; 4876 used_xadj = NULL; 4877 used_adjncy = NULL; 4878 } 4879 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4880 } 4881 } 4882 4883 /* setup sub_schurs data */ 4884 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4885 if (!sub_schurs->use_mumps) { 4886 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4887 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4888 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 4889 } else { 4890 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4891 PetscBool isseqaij; 4892 if (!pcbddc->use_vertices && reuse_solvers) { 4893 PetscInt n_vertices; 4894 4895 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4896 reuse_solvers = (PetscBool)!n_vertices; 4897 } 4898 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4899 if (!isseqaij) { 4900 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4901 if (matis->A == pcbddc->local_mat) { 4902 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4903 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4904 } else { 4905 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4906 } 4907 } 4908 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 4909 } 4910 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4911 4912 /* free adjacency */ 4913 if (free_used_adj) { 4914 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4915 } 4916 PetscFunctionReturn(0); 4917 } 4918 4919 #undef __FUNCT__ 4920 #define __FUNCT__ "PCBDDCInitSubSchurs" 4921 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4922 { 4923 PC_IS *pcis=(PC_IS*)pc->data; 4924 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4925 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4926 PCBDDCGraph graph; 4927 PetscErrorCode ierr; 4928 4929 PetscFunctionBegin; 4930 /* attach interface graph for determining subsets */ 4931 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4932 IS verticesIS,verticescomm; 4933 PetscInt vsize,*idxs; 4934 4935 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4936 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4937 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4938 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4939 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4940 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4941 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4942 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4943 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4944 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4945 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4946 /* 4947 if (pcbddc->dbg_flag) { 4948 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4949 } 4950 */ 4951 } else { 4952 graph = pcbddc->mat_graph; 4953 } 4954 4955 /* sub_schurs init */ 4956 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4957 4958 /* free graph struct */ 4959 if (pcbddc->sub_schurs_rebuild) { 4960 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4961 } 4962 PetscFunctionReturn(0); 4963 } 4964