1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 273 } else { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 291 for (j=0;j<B_N;j++) { 292 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 293 } 294 } 295 } 296 #if 0 297 for (j=0;j<B_neigs;j++) { 298 PetscBLASInt Blas_N,Blas_one = 1.0; 299 PetscScalar norm; 300 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 301 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 302 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 303 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 304 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 305 } else { 306 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 307 } 308 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 } 310 #endif 311 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 312 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 313 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 314 cum++; 315 } 316 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 317 /* shift for next computation */ 318 cumarray += subset_size*subset_size; 319 } 320 if (pcbddc->dbg_flag) { 321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 322 } 323 324 if (mss) { 325 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 326 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 327 /* destroy matrices (junk) */ 328 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 329 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 330 } 331 if (allocated_S_St) { 332 ierr = PetscFree2(S,St);CHKERRQ(ierr); 333 } 334 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 335 #if defined(PETSC_USE_COMPLEX) 336 ierr = PetscFree(rwork);CHKERRQ(ierr); 337 #endif 338 if (pcbddc->dbg_flag) { 339 PetscInt maxneigs_r; 340 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCSetUpSolvers" 348 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 349 { 350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 351 PetscScalar *coarse_submat_vals; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 /* Setup local scatters R_to_B and (optionally) R_to_D */ 356 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 357 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 358 359 /* Setup local neumann solver ksp_R */ 360 /* PCBDDCSetUpLocalScatters should be called first! */ 361 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 362 363 /* Change global null space passed in by the user if change of basis has been requested */ 364 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 365 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 366 } 367 368 /* 369 Setup local correction and local part of coarse basis. 370 Gives back the dense local part of the coarse matrix in column major ordering 371 */ 372 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 373 374 /* Compute total number of coarse nodes and setup coarse solver */ 375 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 376 377 /* free */ 378 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCBDDCResetCustomization" 384 PetscErrorCode PCBDDCResetCustomization(PC pc) 385 { 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PetscErrorCode ierr; 388 389 PetscFunctionBegin; 390 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 392 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 393 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 394 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 396 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 397 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 398 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 399 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 400 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetTopography" 406 PetscErrorCode PCBDDCResetTopography(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 415 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 416 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCResetSolvers" 422 PetscErrorCode PCBDDCResetSolvers(PC pc) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 PetscScalar *array; 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 430 if (pcbddc->coarse_phi_B) { 431 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 432 ierr = PetscFree(array);CHKERRQ(ierr); 433 } 434 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 435 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 436 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 438 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 442 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 445 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 446 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 447 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 448 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 449 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 450 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 453 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 454 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 455 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 456 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 462 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 463 { 464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 465 PC_IS *pcis = (PC_IS*)pc->data; 466 VecType impVecType; 467 PetscInt n_constraints,n_R,old_size; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 if (!pcbddc->ConstraintMatrix) { 472 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 473 } 474 /* get sizes */ 475 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 476 n_R = pcis->n-pcbddc->n_vertices; 477 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 478 /* local work vectors (try to avoid unneeded work)*/ 479 /* R nodes */ 480 old_size = -1; 481 if (pcbddc->vec1_R) { 482 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 483 } 484 if (n_R != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 487 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 488 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 489 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 490 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 491 } 492 /* local primal dofs */ 493 old_size = -1; 494 if (pcbddc->vec1_P) { 495 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 496 } 497 if (pcbddc->local_primal_size != old_size) { 498 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 502 } 503 /* local explicit constraints */ 504 old_size = -1; 505 if (pcbddc->vec1_C) { 506 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 507 } 508 if (n_constraints && n_constraints != old_size) { 509 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 511 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 512 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 513 } 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCBDDCSetUpCorrection" 519 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 520 { 521 PetscErrorCode ierr; 522 /* pointers to pcis and pcbddc */ 523 PC_IS* pcis = (PC_IS*)pc->data; 524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 526 /* submatrices of local problem */ 527 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 528 /* submatrices of local coarse problem */ 529 Mat S_VV,S_CV,S_VC,S_CC; 530 /* working matrices */ 531 Mat C_CR; 532 /* additional working stuff */ 533 PC pc_R; 534 Mat F; 535 PetscBool isLU,isCHOL,isILU; 536 537 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 538 PetscScalar *work; 539 PetscInt *idx_V_B; 540 PetscInt n,n_vertices,n_constraints; 541 PetscInt i,n_R,n_D,n_B; 542 PetscBool unsymmetric_check; 543 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 544 MatType impMatType; 545 /* some shortcuts to scalars */ 546 PetscScalar one=1.0,m_one=-1.0; 547 548 PetscFunctionBegin; 549 n_vertices = pcbddc->n_vertices; 550 n_constraints = pcbddc->local_primal_size-n_vertices; 551 /* Set Non-overlapping dimensions */ 552 n_B = pcis->n_B; 553 n_D = pcis->n - n_B; 554 n_R = pcis->n - n_vertices; 555 556 /* Set types for local objects needed by BDDC precondtioner */ 557 impMatType = MATSEQDENSE; 558 559 /* vertices in boundary numbering */ 560 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 561 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 562 if (i != n_vertices) { 563 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 564 } 565 566 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 567 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 568 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 569 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 571 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 573 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 576 577 unsymmetric_check = PETSC_FALSE; 578 /* allocate workspace */ 579 n = 0; 580 if (n_constraints) { 581 n += n_R*n_constraints; 582 } 583 if (n_vertices) { 584 n = PetscMax(2*n_R*n_vertices,n); 585 n = PetscMax((n_R+n_B)*n_vertices,n); 586 } 587 if (!pcbddc->symmetric_primal) { 588 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 589 unsymmetric_check = PETSC_TRUE; 590 } 591 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 592 593 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 594 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 595 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 596 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 597 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 598 if (isLU || isILU || isCHOL) { 599 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 600 } else if (sub_schurs->reuse_mumps) { 601 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 602 MatFactorType type; 603 604 F = reuse_mumps->F; 605 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 606 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 607 } else { 608 F = NULL; 609 } 610 611 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 612 if (n_constraints) { 613 Mat M1,M2,M3; 614 Mat auxmat; 615 IS is_aux; 616 PetscScalar *array,*array2; 617 618 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 627 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 641 if (F) { 642 Mat B; 643 644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 645 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 646 ierr = MatDestroy(&B);CHKERRQ(ierr); 647 } else { 648 PetscScalar *marr; 649 650 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 651 for (i=0;i<n_constraints;i++) { 652 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 653 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 654 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 657 } 658 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 659 } 660 if (!pcbddc->switch_static) { 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 662 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 663 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 664 for (i=0;i<n_constraints;i++) { 665 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 666 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 667 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 670 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 671 } 672 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 674 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 675 } else { 676 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 677 pcbddc->local_auxmat2 = local_auxmat2_R; 678 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 679 } 680 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 681 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 682 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 683 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 684 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 685 if (isCHOL) { 686 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 687 } else { 688 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 689 } 690 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 691 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 692 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 693 ierr = MatDestroy(&M2);CHKERRQ(ierr); 694 ierr = MatDestroy(&M3);CHKERRQ(ierr); 695 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 696 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 697 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 698 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 699 ierr = MatDestroy(&M1);CHKERRQ(ierr); 700 } 701 /* Get submatrices from subdomain matrix */ 702 if (n_vertices) { 703 Mat newmat; 704 IS is_aux; 705 PetscInt ibs,mbs; 706 PetscBool issbaij; 707 708 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 709 IS tis; 710 711 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 712 ierr = ISSort(tis);CHKERRQ(ierr); 713 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 714 ierr = ISDestroy(&tis);CHKERRQ(ierr); 715 } else { 716 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 } 718 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 719 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 720 if (ibs != mbs) { /* need to convert to SEQAIJ */ 721 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 724 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 725 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 727 } else { 728 /* this is safe */ 729 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 731 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 732 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 734 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 735 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 736 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 738 } else { 739 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 741 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 742 } 743 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 744 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 745 } 746 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 747 } 748 749 /* Matrix of coarse basis functions (local) */ 750 if (pcbddc->coarse_phi_B) { 751 PetscInt on_B,on_primal,on_D=n_D; 752 if (pcbddc->coarse_phi_D) { 753 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 754 } 755 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 756 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 757 PetscScalar *marray; 758 759 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 760 ierr = PetscFree(marray);CHKERRQ(ierr); 761 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 762 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 763 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 765 } 766 } 767 768 if (!pcbddc->coarse_phi_B) { 769 PetscScalar *marray; 770 771 n = n_B*pcbddc->local_primal_size; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 n += n_D*pcbddc->local_primal_size; 774 } 775 if (!pcbddc->symmetric_primal) { 776 n *= 2; 777 } 778 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 780 n = n_B*pcbddc->local_primal_size; 781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 783 n += n_D*pcbddc->local_primal_size; 784 } 785 if (!pcbddc->symmetric_primal) { 786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 n = n_B*pcbddc->local_primal_size; 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 793 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 796 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 797 } 798 } 799 } 800 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 801 /* vertices */ 802 if (n_vertices) { 803 804 if (n_R) { 805 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 806 PetscBLASInt B_N,B_one = 1; 807 PetscScalar *x,*y; 808 809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 810 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 811 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 812 if (F) { /* TODO could be optimized for symmetric problems */ 813 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 814 } else { 815 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 816 for (i=0;i<n_vertices;i++) { 817 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 818 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 819 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 820 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 822 } 823 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 826 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 827 if (n_constraints) { 828 Mat B; 829 830 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 831 for (i=0;i<n_vertices;i++) { 832 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 838 } 839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 841 ierr = MatDestroy(&B);CHKERRQ(ierr); 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 843 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 844 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 845 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 846 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 847 ierr = MatDestroy(&B);CHKERRQ(ierr); 848 } 849 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 850 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 851 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 852 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 854 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 855 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 856 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 857 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 858 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 859 } else { 860 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 861 } 862 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 863 /* coarse basis functions */ 864 for (i=0;i<n_vertices;i++) { 865 PetscScalar *y; 866 867 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 868 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 870 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 y[n_B*i+idx_V_B[i]] = 1.0; 873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 875 876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 877 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 878 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 879 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 883 } 884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 885 } 886 } 887 888 if (n_constraints) { 889 Mat B; 890 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 892 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 893 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 894 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 895 if (n_vertices) { 896 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 897 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 898 } else { 899 Mat S_VCt; 900 901 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 902 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 903 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<n_constraints;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 919 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 921 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 924 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 925 } 926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 927 } 928 } 929 if (n_constraints) { 930 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 931 } 932 933 /* compute other basis functions for non-symmetric problems */ 934 if (!pcbddc->symmetric_primal) { 935 936 if (n_constraints) { 937 Mat S_CCT,B_C; 938 939 /* this is a lazy thing */ 940 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 942 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 944 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 945 if (n_vertices) { 946 Mat B_V,S_VCT; 947 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 949 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 950 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 951 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 952 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 953 } 954 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 955 } else { /* if there are no constraints, reset work */ 956 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 957 } 958 if (n_vertices && n_R) { 959 Mat A_VRT; 960 PetscScalar *marray; 961 PetscBLASInt B_N,B_one = 1; 962 963 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 964 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 965 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 966 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 967 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 968 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 969 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 970 } 971 972 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 973 for (i=0;i<pcbddc->local_primal_size;i++) { 974 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 975 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 976 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 977 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 979 } 980 } else { 981 for (i=0;i<pcbddc->local_primal_size;i++) { 982 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 983 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 984 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 986 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 987 } 988 } 989 /* coarse basis functions */ 990 for (i=0;i<pcbddc->local_primal_size;i++) { 991 PetscScalar *y; 992 993 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 994 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 996 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 if (i<n_vertices) { 999 y[n_B*i+idx_V_B[i]] = 1.0; 1000 } 1001 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1003 1004 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1005 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1006 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1007 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1010 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1011 } 1012 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1013 } 1014 } 1015 /* free memory */ 1016 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1017 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1018 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1019 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1020 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1021 ierr = PetscFree(work);CHKERRQ(ierr); 1022 if (n_vertices) { 1023 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1024 } 1025 if (n_constraints) { 1026 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1027 } 1028 /* Checking coarse_sub_mat and coarse basis functios */ 1029 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1030 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1031 if (pcbddc->dbg_flag) { 1032 Mat coarse_sub_mat; 1033 Mat AUXMAT,TM1,TM2,TM3,TM4; 1034 Mat coarse_phi_D,coarse_phi_B; 1035 Mat coarse_psi_D,coarse_psi_B; 1036 Mat A_II,A_BB,A_IB,A_BI; 1037 Mat C_B,CPHI; 1038 IS is_dummy; 1039 Vec mones; 1040 MatType checkmattype=MATSEQAIJ; 1041 PetscReal real_value; 1042 1043 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1044 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1045 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1046 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1047 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1048 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1049 if (unsymmetric_check) { 1050 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1051 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1052 } 1053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1054 1055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1058 if (unsymmetric_check) { 1059 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1060 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1061 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1063 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1066 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1071 } else { 1072 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1073 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1074 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1077 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1078 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1080 } 1081 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1082 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1083 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1084 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1085 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1086 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 1090 /* check constraints */ 1091 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1093 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1094 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1095 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1096 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1097 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1099 if (unsymmetric_check) { 1100 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1101 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1102 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1103 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1105 } 1106 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1107 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1109 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1113 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1114 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1115 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1116 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1117 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1118 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1119 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1120 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1121 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1122 if (unsymmetric_check) { 1123 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1124 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1125 } 1126 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1127 } 1128 /* get back data */ 1129 *coarse_submat_vals_n = coarse_submat_vals; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1136 { 1137 Mat *work_mat; 1138 IS isrow_s,iscol_s; 1139 PetscBool rsorted,csorted; 1140 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1145 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1146 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1148 1149 if (!rsorted) { 1150 const PetscInt *idxs; 1151 PetscInt *idxs_sorted,i; 1152 1153 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1155 for (i=0;i<rsize;i++) { 1156 idxs_perm_r[i] = i; 1157 } 1158 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1159 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1160 for (i=0;i<rsize;i++) { 1161 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1162 } 1163 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1164 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1167 isrow_s = isrow; 1168 } 1169 1170 if (!csorted) { 1171 if (isrow == iscol) { 1172 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1173 iscol_s = isrow_s; 1174 } else { 1175 const PetscInt *idxs; 1176 PetscInt *idxs_sorted,i; 1177 1178 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1179 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1180 for (i=0;i<csize;i++) { 1181 idxs_perm_c[i] = i; 1182 } 1183 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1184 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1185 for (i=0;i<csize;i++) { 1186 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1187 } 1188 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1189 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1190 } 1191 } else { 1192 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1193 iscol_s = iscol; 1194 } 1195 1196 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1197 1198 if (!rsorted || !csorted) { 1199 Mat new_mat; 1200 IS is_perm_r,is_perm_c; 1201 1202 if (!rsorted) { 1203 PetscInt *idxs_r,i; 1204 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1205 for (i=0;i<rsize;i++) { 1206 idxs_r[idxs_perm_r[i]] = i; 1207 } 1208 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1209 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1210 } else { 1211 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1212 } 1213 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1214 1215 if (!csorted) { 1216 if (isrow_s == iscol_s) { 1217 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1218 is_perm_c = is_perm_r; 1219 } else { 1220 PetscInt *idxs_c,i; 1221 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1222 for (i=0;i<csize;i++) { 1223 idxs_c[idxs_perm_c[i]] = i; 1224 } 1225 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1227 } 1228 } else { 1229 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1230 } 1231 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1232 1233 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1234 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1235 work_mat[0] = new_mat; 1236 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1237 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1238 } 1239 1240 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1241 *B = work_mat[0]; 1242 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1243 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1244 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1251 { 1252 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1253 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1254 Mat new_mat; 1255 IS is_local,is_global; 1256 PetscInt local_size; 1257 PetscBool isseqaij; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1262 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1263 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1265 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1266 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1267 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1268 1269 /* check */ 1270 if (pcbddc->dbg_flag) { 1271 Vec x,x_change; 1272 PetscReal error; 1273 1274 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1275 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1276 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1280 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1282 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1283 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1286 ierr = VecDestroy(&x);CHKERRQ(ierr); 1287 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1288 } 1289 1290 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1291 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1292 if (isseqaij) { 1293 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1294 } else { 1295 Mat work_mat; 1296 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1297 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1298 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1299 } 1300 if (matis->A->symmetric_set) { 1301 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1302 #if !defined(PETSC_USE_COMPLEX) 1303 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1304 #endif 1305 } 1306 /* 1307 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1308 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1309 */ 1310 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1316 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1317 { 1318 PC_IS* pcis = (PC_IS*)(pc->data); 1319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1320 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1321 PetscInt *idx_R_local=NULL; 1322 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1323 PetscInt vbs,bs; 1324 PetscBT bitmask=NULL; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* 1329 No need to setup local scatters if 1330 - primal space is unchanged 1331 AND 1332 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1333 AND 1334 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1335 */ 1336 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1337 PetscFunctionReturn(0); 1338 } 1339 /* destroy old objects */ 1340 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1341 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1343 /* Set Non-overlapping dimensions */ 1344 n_B = pcis->n_B; 1345 n_D = pcis->n - n_B; 1346 n_vertices = pcbddc->n_vertices; 1347 1348 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1349 1350 /* create auxiliary bitmask and allocate workspace */ 1351 if (!sub_schurs->reuse_mumps) { 1352 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1353 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1354 for (i=0;i<n_vertices;i++) { 1355 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1356 } 1357 1358 for (i=0, n_R=0; i<pcis->n; i++) { 1359 if (!PetscBTLookup(bitmask,i)) { 1360 idx_R_local[n_R++] = i; 1361 } 1362 } 1363 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1364 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1365 1366 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1367 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1368 } 1369 1370 /* Block code */ 1371 vbs = 1; 1372 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1373 if (bs>1 && !(n_vertices%bs)) { 1374 PetscBool is_blocked = PETSC_TRUE; 1375 PetscInt *vary; 1376 if (!sub_schurs->reuse_mumps) { 1377 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1378 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1379 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1380 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1381 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1382 for (i=0; i<pcis->n/bs; i++) { 1383 if (vary[i]!=0 && vary[i]!=bs) { 1384 is_blocked = PETSC_FALSE; 1385 break; 1386 } 1387 } 1388 ierr = PetscFree(vary);CHKERRQ(ierr); 1389 } else { 1390 /* Verify directly the R set */ 1391 for (i=0; i<n_R/bs; i++) { 1392 PetscInt j,node=idx_R_local[bs*i]; 1393 for (j=1; j<bs; j++) { 1394 if (node != idx_R_local[bs*i+j]-j) { 1395 is_blocked = PETSC_FALSE; 1396 break; 1397 } 1398 } 1399 } 1400 } 1401 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1402 vbs = bs; 1403 for (i=0;i<n_R/vbs;i++) { 1404 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1405 } 1406 } 1407 } 1408 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1409 if (sub_schurs->reuse_mumps) { 1410 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1411 1412 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1413 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1414 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1415 reuse_mumps->is_R = pcbddc->is_R_local; 1416 } else { 1417 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1418 } 1419 1420 /* print some info if requested */ 1421 if (pcbddc->dbg_flag) { 1422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1423 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1425 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1429 } 1430 1431 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1432 if (!sub_schurs->reuse_mumps) { 1433 IS is_aux1,is_aux2; 1434 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1435 1436 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1437 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1439 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1440 for (i=0; i<n_D; i++) { 1441 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1442 } 1443 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 for (i=0, j=0; i<n_R; i++) { 1445 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1446 aux_array1[j++] = i; 1447 } 1448 } 1449 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1450 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_B; i++) { 1452 if (!PetscBTLookup(bitmask,is_indices[i])) { 1453 aux_array2[j++] = i; 1454 } 1455 } 1456 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1457 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1458 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1459 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1460 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1461 1462 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1463 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1464 for (i=0, j=0; i<n_R; i++) { 1465 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1466 aux_array1[j++] = i; 1467 } 1468 } 1469 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1470 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1471 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1472 } 1473 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1474 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1475 } else { 1476 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1477 IS tis; 1478 PetscInt schur_size; 1479 1480 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1481 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1482 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1483 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1484 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1485 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1486 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1487 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1488 } 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1496 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1497 { 1498 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1499 PC_IS *pcis = (PC_IS*)pc->data; 1500 PC pc_temp; 1501 Mat A_RR; 1502 MatReuse reuse; 1503 PetscScalar m_one = -1.0; 1504 PetscReal value; 1505 PetscInt n_D,n_R,ibs,mbs; 1506 PetscBool use_exact,use_exact_reduced,issbaij; 1507 PetscErrorCode ierr; 1508 /* prefixes stuff */ 1509 char dir_prefix[256],neu_prefix[256],str_level[16]; 1510 size_t len; 1511 1512 PetscFunctionBegin; 1513 1514 /* compute prefixes */ 1515 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1516 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1517 if (!pcbddc->current_level) { 1518 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1519 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1522 } else { 1523 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1524 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1525 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1526 len -= 15; /* remove "pc_bddc_coarse_" */ 1527 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1528 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1529 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1530 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1531 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1532 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1533 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1534 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1535 } 1536 1537 /* DIRICHLET PROBLEM */ 1538 if (dirichlet) { 1539 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1540 if (pcbddc->local_mat->symmetric_set) { 1541 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1542 } 1543 /* Matrix for Dirichlet problem is pcis->A_II */ 1544 n_D = pcis->n - pcis->n_B; 1545 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1546 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1548 /* default */ 1549 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1550 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1552 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1553 if (issbaij) { 1554 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1555 } else { 1556 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1557 } 1558 /* Allow user's customization */ 1559 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1560 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1561 } 1562 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1563 if (sub_schurs->reuse_mumps) { 1564 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1565 1566 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1567 } 1568 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1569 if (!n_D) { 1570 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1571 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1572 } 1573 /* Set Up KSP for Dirichlet problem of BDDC */ 1574 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1575 /* set ksp_D into pcis data */ 1576 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1577 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1578 pcis->ksp_D = pcbddc->ksp_D; 1579 } 1580 1581 /* NEUMANN PROBLEM */ 1582 A_RR = 0; 1583 if (neumann) { 1584 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1585 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1586 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1587 if (pcbddc->ksp_R) { /* already created ksp */ 1588 PetscInt nn_R; 1589 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1590 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1591 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1592 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1593 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1594 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1595 reuse = MAT_INITIAL_MATRIX; 1596 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1597 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1598 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1599 reuse = MAT_INITIAL_MATRIX; 1600 } else { /* safe to reuse the matrix */ 1601 reuse = MAT_REUSE_MATRIX; 1602 } 1603 } 1604 /* last check */ 1605 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1606 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1607 reuse = MAT_INITIAL_MATRIX; 1608 } 1609 } else { /* first time, so we need to create the matrix */ 1610 reuse = MAT_INITIAL_MATRIX; 1611 } 1612 /* extract A_RR */ 1613 if (!sub_schurs->reuse_mumps) { 1614 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1615 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1616 if (ibs != mbs) { 1617 Mat newmat; 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1619 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1620 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1623 } 1624 if (pcbddc->local_mat->symmetric_set) { 1625 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1626 } 1627 } else { 1628 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1629 1630 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1631 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1633 } 1634 if (!pcbddc->ksp_R) { /* create object if not present */ 1635 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1637 /* default */ 1638 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1639 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1640 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1641 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1642 if (issbaij) { 1643 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1644 } else { 1645 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1646 } 1647 /* Allow user's customization */ 1648 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1649 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1650 } 1651 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1652 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1653 if (!n_R) { 1654 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1655 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1656 } 1657 /* Reuse MUMPS solver if it is present */ 1658 if (sub_schurs->reuse_mumps) { 1659 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1660 1661 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1662 } 1663 /* Set Up KSP for Neumann problem of BDDC */ 1664 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1665 } 1666 /* free Neumann problem's matrix */ 1667 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1668 1669 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1670 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1671 if (pcbddc->dbg_flag) { 1672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1673 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1675 } 1676 if (dirichlet) { /* Dirichlet */ 1677 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1678 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1679 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1680 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1681 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1682 /* need to be adapted? */ 1683 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1684 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1685 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1686 /* print info */ 1687 if (pcbddc->dbg_flag) { 1688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1690 } 1691 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1692 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1693 } 1694 } 1695 if (neumann) { /* Neumann */ 1696 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1697 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1698 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1700 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1701 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1702 /* need to be adapted? */ 1703 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1704 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1705 /* print info */ 1706 if (pcbddc->dbg_flag) { 1707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1709 } 1710 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1711 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1712 } 1713 } 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1720 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1721 { 1722 PetscErrorCode ierr; 1723 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1724 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1725 1726 PetscFunctionBegin; 1727 if (!sub_schurs->reuse_mumps) { 1728 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1729 } 1730 if (!pcbddc->switch_static) { 1731 if (applytranspose && pcbddc->local_auxmat1) { 1732 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1733 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1734 } 1735 if (!sub_schurs->reuse_mumps) { 1736 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 } else { 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1743 } 1744 } else { 1745 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 if (applytranspose && pcbddc->local_auxmat1) { 1750 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1751 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1752 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 } 1755 } 1756 if (!sub_schurs->reuse_mumps) { 1757 if (applytranspose) { 1758 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1759 } else { 1760 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1761 } 1762 #if defined(PETSC_HAVE_MUMPS) 1763 } else { 1764 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1765 1766 if (applytranspose) { 1767 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1768 } else { 1769 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1770 } 1771 #endif 1772 } 1773 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1774 if (!pcbddc->switch_static) { 1775 if (!sub_schurs->reuse_mumps) { 1776 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 } else { 1779 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1780 1781 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1782 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1783 } 1784 if (!applytranspose && pcbddc->local_auxmat1) { 1785 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1786 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1787 } 1788 } else { 1789 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 if (!applytranspose && pcbddc->local_auxmat1) { 1794 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1795 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1796 } 1797 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1808 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1809 { 1810 PetscErrorCode ierr; 1811 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1812 PC_IS* pcis = (PC_IS*) (pc->data); 1813 const PetscScalar zero = 0.0; 1814 1815 PetscFunctionBegin; 1816 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1817 if (applytranspose) { 1818 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1819 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1820 } else { 1821 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1822 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1823 } 1824 /* start communications from local primal nodes to rhs of coarse solver */ 1825 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1826 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1828 1829 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1830 /* TODO remove null space when doing multilevel */ 1831 if (pcbddc->coarse_ksp) { 1832 Vec rhs,sol; 1833 1834 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1835 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1836 if (applytranspose) { 1837 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1838 } else { 1839 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1840 } 1841 } 1842 1843 /* Local solution on R nodes */ 1844 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1845 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1846 } 1847 1848 /* communications from coarse sol to local primal nodes */ 1849 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1851 1852 /* Sum contributions from two levels */ 1853 if (applytranspose) { 1854 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1855 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1856 } else { 1857 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1858 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1865 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1866 { 1867 PetscErrorCode ierr; 1868 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1869 PetscScalar *array; 1870 Vec from,to; 1871 1872 PetscFunctionBegin; 1873 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1874 from = pcbddc->coarse_vec; 1875 to = pcbddc->vec1_P; 1876 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1877 Vec tvec; 1878 1879 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1880 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1881 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1882 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1883 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1884 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1885 } 1886 } else { /* from local to global -> put data in coarse right hand side */ 1887 from = pcbddc->vec1_P; 1888 to = pcbddc->coarse_vec; 1889 } 1890 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1896 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1897 { 1898 PetscErrorCode ierr; 1899 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1900 PetscScalar *array; 1901 Vec from,to; 1902 1903 PetscFunctionBegin; 1904 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1905 from = pcbddc->coarse_vec; 1906 to = pcbddc->vec1_P; 1907 } else { /* from local to global -> put data in coarse right hand side */ 1908 from = pcbddc->vec1_P; 1909 to = pcbddc->coarse_vec; 1910 } 1911 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1912 if (smode == SCATTER_FORWARD) { 1913 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1914 Vec tvec; 1915 1916 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1917 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1918 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1919 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1920 } 1921 } else { 1922 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1923 ierr = VecResetArray(from);CHKERRQ(ierr); 1924 } 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 /* uncomment for testing purposes */ 1930 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1933 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1934 { 1935 PetscErrorCode ierr; 1936 PC_IS* pcis = (PC_IS*)(pc->data); 1937 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1938 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1939 /* one and zero */ 1940 PetscScalar one=1.0,zero=0.0; 1941 /* space to store constraints and their local indices */ 1942 PetscScalar *constraints_data; 1943 PetscInt *constraints_idxs,*constraints_idxs_B; 1944 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1945 PetscInt *constraints_n; 1946 /* iterators */ 1947 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1948 /* BLAS integers */ 1949 PetscBLASInt lwork,lierr; 1950 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1951 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1952 /* reuse */ 1953 PetscInt olocal_primal_size,olocal_primal_size_cc; 1954 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1955 /* change of basis */ 1956 PetscBool qr_needed; 1957 PetscBT change_basis,qr_needed_idx; 1958 /* auxiliary stuff */ 1959 PetscInt *nnz,*is_indices; 1960 PetscInt ncc; 1961 /* some quantities */ 1962 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1963 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1964 1965 PetscFunctionBegin; 1966 /* Destroy Mat objects computed previously */ 1967 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1968 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1969 /* save info on constraints from previous setup (if any) */ 1970 olocal_primal_size = pcbddc->local_primal_size; 1971 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1972 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1973 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1974 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1975 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1976 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1977 1978 /* print some info */ 1979 if (pcbddc->dbg_flag) { 1980 IS vertices; 1981 PetscInt nv,nedges,nfaces; 1982 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1983 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1984 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1991 } 1992 1993 if (!pcbddc->adaptive_selection) { 1994 IS ISForVertices,*ISForFaces,*ISForEdges; 1995 MatNullSpace nearnullsp; 1996 const Vec *nearnullvecs; 1997 Vec *localnearnullsp; 1998 PetscScalar *array; 1999 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2000 PetscBool nnsp_has_cnst; 2001 /* LAPACK working arrays for SVD or POD */ 2002 PetscBool skip_lapack,boolforchange; 2003 PetscScalar *work; 2004 PetscReal *singular_vals; 2005 #if defined(PETSC_USE_COMPLEX) 2006 PetscReal *rwork; 2007 #endif 2008 #if defined(PETSC_MISSING_LAPACK_GESVD) 2009 PetscScalar *temp_basis,*correlation_mat; 2010 #else 2011 PetscBLASInt dummy_int=1; 2012 PetscScalar dummy_scalar=1.; 2013 #endif 2014 2015 /* Get index sets for faces, edges and vertices from graph */ 2016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2017 /* free unneeded index sets */ 2018 if (!pcbddc->use_vertices) { 2019 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2020 } 2021 if (!pcbddc->use_edges) { 2022 for (i=0;i<n_ISForEdges;i++) { 2023 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2024 } 2025 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2026 n_ISForEdges = 0; 2027 } 2028 if (!pcbddc->use_faces) { 2029 for (i=0;i<n_ISForFaces;i++) { 2030 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2031 } 2032 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2033 n_ISForFaces = 0; 2034 } 2035 2036 #if defined(PETSC_USE_DEBUG) 2037 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2038 Also use_change_of_basis should be consistent among processors */ 2039 if (pcbddc->NullSpace) { 2040 PetscBool tbool[2],gbool[2]; 2041 2042 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2043 pcbddc->use_change_of_basis = PETSC_TRUE; 2044 if (!ISForEdges) { 2045 pcbddc->use_change_on_faces = PETSC_TRUE; 2046 } 2047 } 2048 tbool[0] = pcbddc->use_change_of_basis; 2049 tbool[1] = pcbddc->use_change_on_faces; 2050 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2051 pcbddc->use_change_of_basis = gbool[0]; 2052 pcbddc->use_change_on_faces = gbool[1]; 2053 } 2054 #endif 2055 2056 /* check if near null space is attached to global mat */ 2057 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2058 if (nearnullsp) { 2059 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2060 /* remove any stored info */ 2061 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2062 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2063 /* store information for BDDC solver reuse */ 2064 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2065 pcbddc->onearnullspace = nearnullsp; 2066 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2067 for (i=0;i<nnsp_size;i++) { 2068 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2069 } 2070 } else { /* if near null space is not provided BDDC uses constants by default */ 2071 nnsp_size = 0; 2072 nnsp_has_cnst = PETSC_TRUE; 2073 } 2074 /* get max number of constraints on a single cc */ 2075 max_constraints = nnsp_size; 2076 if (nnsp_has_cnst) max_constraints++; 2077 2078 /* 2079 Evaluate maximum storage size needed by the procedure 2080 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2081 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2082 There can be multiple constraints per connected component 2083 */ 2084 n_vertices = 0; 2085 if (ISForVertices) { 2086 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2087 } 2088 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2089 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2090 2091 total_counts = n_ISForFaces+n_ISForEdges; 2092 total_counts *= max_constraints; 2093 total_counts += n_vertices; 2094 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2095 2096 total_counts = 0; 2097 max_size_of_constraint = 0; 2098 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2099 IS used_is; 2100 if (i<n_ISForEdges) { 2101 used_is = ISForEdges[i]; 2102 } else { 2103 used_is = ISForFaces[i-n_ISForEdges]; 2104 } 2105 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2106 total_counts += j; 2107 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2108 } 2109 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2110 2111 /* get local part of global near null space vectors */ 2112 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2113 for (k=0;k<nnsp_size;k++) { 2114 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2115 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 } 2118 2119 /* whether or not to skip lapack calls */ 2120 skip_lapack = PETSC_TRUE; 2121 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2122 2123 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2124 if (!skip_lapack) { 2125 PetscScalar temp_work; 2126 2127 #if defined(PETSC_MISSING_LAPACK_GESVD) 2128 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2129 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2130 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2131 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2132 #if defined(PETSC_USE_COMPLEX) 2133 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2134 #endif 2135 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2138 lwork = -1; 2139 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2140 #if !defined(PETSC_USE_COMPLEX) 2141 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2142 #else 2143 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2144 #endif 2145 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2146 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2147 #else /* on missing GESVD */ 2148 /* SVD */ 2149 PetscInt max_n,min_n; 2150 max_n = max_size_of_constraint; 2151 min_n = max_constraints; 2152 if (max_size_of_constraint < max_constraints) { 2153 min_n = max_size_of_constraint; 2154 max_n = max_constraints; 2155 } 2156 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2157 #if defined(PETSC_USE_COMPLEX) 2158 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2159 #endif 2160 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2161 lwork = -1; 2162 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2165 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2166 #if !defined(PETSC_USE_COMPLEX) 2167 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2168 #else 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2170 #endif 2171 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2173 #endif /* on missing GESVD */ 2174 /* Allocate optimal workspace */ 2175 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2176 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2177 } 2178 /* Now we can loop on constraining sets */ 2179 total_counts = 0; 2180 constraints_idxs_ptr[0] = 0; 2181 constraints_data_ptr[0] = 0; 2182 /* vertices */ 2183 if (n_vertices) { 2184 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2185 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2186 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2187 for (i=0;i<n_vertices;i++) { 2188 constraints_n[total_counts] = 1; 2189 constraints_data[total_counts] = 1.0; 2190 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2191 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2192 total_counts++; 2193 } 2194 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2195 PetscBool used_vertex; 2196 for (i=0;i<n_vertices;i++) { 2197 used_vertex = PETSC_FALSE; 2198 k = 0; 2199 while (!used_vertex && k<nnsp_size) { 2200 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2201 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2202 constraints_n[total_counts] = 1; 2203 constraints_idxs[total_counts] = is_indices[i]; 2204 constraints_data[total_counts] = 1.0; 2205 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2206 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2207 total_counts++; 2208 used_vertex = PETSC_TRUE; 2209 } 2210 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2211 k++; 2212 } 2213 } 2214 } 2215 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2216 n_vertices = total_counts; 2217 } 2218 2219 /* edges and faces */ 2220 total_counts_cc = total_counts; 2221 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2222 IS used_is; 2223 PetscBool idxs_copied = PETSC_FALSE; 2224 2225 if (ncc<n_ISForEdges) { 2226 used_is = ISForEdges[ncc]; 2227 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2228 } else { 2229 used_is = ISForFaces[ncc-n_ISForEdges]; 2230 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2231 } 2232 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2233 2234 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2235 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2236 /* change of basis should not be performed on local periodic nodes */ 2237 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2238 if (nnsp_has_cnst) { 2239 PetscScalar quad_value; 2240 2241 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2242 idxs_copied = PETSC_TRUE; 2243 2244 if (!pcbddc->use_nnsp_true) { 2245 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2246 } else { 2247 quad_value = 1.0; 2248 } 2249 for (j=0;j<size_of_constraint;j++) { 2250 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2251 } 2252 temp_constraints++; 2253 total_counts++; 2254 } 2255 for (k=0;k<nnsp_size;k++) { 2256 PetscReal real_value; 2257 PetscScalar *ptr_to_data; 2258 2259 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2260 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2261 for (j=0;j<size_of_constraint;j++) { 2262 ptr_to_data[j] = array[is_indices[j]]; 2263 } 2264 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2265 /* check if array is null on the connected component */ 2266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2267 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2268 if (real_value > 0.0) { /* keep indices and values */ 2269 temp_constraints++; 2270 total_counts++; 2271 if (!idxs_copied) { 2272 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2273 idxs_copied = PETSC_TRUE; 2274 } 2275 } 2276 } 2277 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2278 valid_constraints = temp_constraints; 2279 if (!pcbddc->use_nnsp_true && temp_constraints) { 2280 if (temp_constraints == 1) { /* just normalize the constraint */ 2281 PetscScalar norm,*ptr_to_data; 2282 2283 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2285 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2286 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2287 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2288 } else { /* perform SVD */ 2289 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2290 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2291 2292 #if defined(PETSC_MISSING_LAPACK_GESVD) 2293 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2294 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2295 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2296 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2297 from that computed using LAPACKgesvd 2298 -> This is due to a different computation of eigenvectors in LAPACKheev 2299 -> The quality of the POD-computed basis will be the same */ 2300 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2301 /* Store upper triangular part of correlation matrix */ 2302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2303 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2304 for (j=0;j<temp_constraints;j++) { 2305 for (k=0;k<j+1;k++) { 2306 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2307 } 2308 } 2309 /* compute eigenvalues and eigenvectors of correlation matrix */ 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2312 #if !defined(PETSC_USE_COMPLEX) 2313 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2314 #else 2315 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2316 #endif 2317 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2318 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2319 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2320 j = 0; 2321 while (j < temp_constraints && singular_vals[j] < tol) j++; 2322 total_counts = total_counts-j; 2323 valid_constraints = temp_constraints-j; 2324 /* scale and copy POD basis into used quadrature memory */ 2325 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2331 if (j<temp_constraints) { 2332 PetscInt ii; 2333 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2334 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2335 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2337 for (k=0;k<temp_constraints-j;k++) { 2338 for (ii=0;ii<size_of_constraint;ii++) { 2339 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2340 } 2341 } 2342 } 2343 #else /* on missing GESVD */ 2344 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2345 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2348 #if !defined(PETSC_USE_COMPLEX) 2349 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2350 #else 2351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2352 #endif 2353 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2355 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2356 k = temp_constraints; 2357 if (k > size_of_constraint) k = size_of_constraint; 2358 j = 0; 2359 while (j < k && singular_vals[k-j-1] < tol) j++; 2360 valid_constraints = k-j; 2361 total_counts = total_counts-temp_constraints+valid_constraints; 2362 #endif /* on missing GESVD */ 2363 } 2364 } 2365 /* update pointers information */ 2366 if (valid_constraints) { 2367 constraints_n[total_counts_cc] = valid_constraints; 2368 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2369 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2370 /* set change_of_basis flag */ 2371 if (boolforchange) { 2372 PetscBTSet(change_basis,total_counts_cc); 2373 } 2374 total_counts_cc++; 2375 } 2376 } 2377 /* free workspace */ 2378 if (!skip_lapack) { 2379 ierr = PetscFree(work);CHKERRQ(ierr); 2380 #if defined(PETSC_USE_COMPLEX) 2381 ierr = PetscFree(rwork);CHKERRQ(ierr); 2382 #endif 2383 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2384 #if defined(PETSC_MISSING_LAPACK_GESVD) 2385 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2386 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2387 #endif 2388 } 2389 for (k=0;k<nnsp_size;k++) { 2390 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2391 } 2392 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2393 /* free index sets of faces, edges and vertices */ 2394 for (i=0;i<n_ISForFaces;i++) { 2395 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2396 } 2397 if (n_ISForFaces) { 2398 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2399 } 2400 for (i=0;i<n_ISForEdges;i++) { 2401 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2402 } 2403 if (n_ISForEdges) { 2404 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2405 } 2406 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2407 } else { 2408 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2409 2410 total_counts = 0; 2411 n_vertices = 0; 2412 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2413 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2414 } 2415 max_constraints = 0; 2416 total_counts_cc = 0; 2417 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2418 total_counts += pcbddc->adaptive_constraints_n[i]; 2419 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2420 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2421 } 2422 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2423 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2424 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2425 constraints_data = pcbddc->adaptive_constraints_data; 2426 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2427 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2428 total_counts_cc = 0; 2429 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2430 if (pcbddc->adaptive_constraints_n[i]) { 2431 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2432 } 2433 } 2434 #if 0 2435 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2436 for (i=0;i<total_counts_cc;i++) { 2437 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2438 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2439 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2440 printf(" %d",constraints_idxs[j]); 2441 } 2442 printf("\n"); 2443 printf("number of cc: %d\n",constraints_n[i]); 2444 } 2445 for (i=0;i<n_vertices;i++) { 2446 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2447 } 2448 for (i=0;i<sub_schurs->n_subs;i++) { 2449 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2450 } 2451 #endif 2452 2453 max_size_of_constraint = 0; 2454 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2455 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2456 /* Change of basis */ 2457 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2458 if (pcbddc->use_change_of_basis) { 2459 for (i=0;i<sub_schurs->n_subs;i++) { 2460 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2461 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2462 } 2463 } 2464 } 2465 } 2466 pcbddc->local_primal_size = total_counts; 2467 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2468 2469 /* map constraints_idxs in boundary numbering */ 2470 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2471 if (i != constraints_idxs_ptr[total_counts_cc]) { 2472 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2473 } 2474 2475 /* Create constraint matrix */ 2476 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2477 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2478 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2479 2480 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2481 /* determine if a QR strategy is needed for change of basis */ 2482 qr_needed = PETSC_FALSE; 2483 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2484 total_primal_vertices=0; 2485 pcbddc->local_primal_size_cc = 0; 2486 for (i=0;i<total_counts_cc;i++) { 2487 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2488 if (size_of_constraint == 1) { 2489 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2490 pcbddc->local_primal_size_cc += 1; 2491 } else if (PetscBTLookup(change_basis,i)) { 2492 for (k=0;k<constraints_n[i];k++) { 2493 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2494 } 2495 pcbddc->local_primal_size_cc += constraints_n[i]; 2496 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2497 PetscBTSet(qr_needed_idx,i); 2498 qr_needed = PETSC_TRUE; 2499 } 2500 } else { 2501 pcbddc->local_primal_size_cc += 1; 2502 } 2503 } 2504 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2505 pcbddc->n_vertices = total_primal_vertices; 2506 /* permute indices in order to have a sorted set of vertices */ 2507 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2508 2509 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2510 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2511 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2512 2513 /* nonzero structure of constraint matrix */ 2514 /* and get reference dof for local constraints */ 2515 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2516 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2517 2518 j = total_primal_vertices; 2519 total_counts = total_primal_vertices; 2520 cum = total_primal_vertices; 2521 for (i=n_vertices;i<total_counts_cc;i++) { 2522 if (!PetscBTLookup(change_basis,i)) { 2523 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2524 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2525 cum++; 2526 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2527 for (k=0;k<constraints_n[i];k++) { 2528 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2529 nnz[j+k] = size_of_constraint; 2530 } 2531 j += constraints_n[i]; 2532 } 2533 } 2534 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2535 ierr = PetscFree(nnz);CHKERRQ(ierr); 2536 2537 /* set values in constraint matrix */ 2538 for (i=0;i<total_primal_vertices;i++) { 2539 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2540 } 2541 total_counts = total_primal_vertices; 2542 for (i=n_vertices;i<total_counts_cc;i++) { 2543 if (!PetscBTLookup(change_basis,i)) { 2544 PetscInt *cols; 2545 2546 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2547 cols = constraints_idxs+constraints_idxs_ptr[i]; 2548 for (k=0;k<constraints_n[i];k++) { 2549 PetscInt row = total_counts+k; 2550 PetscScalar *vals; 2551 2552 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2553 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 total_counts += constraints_n[i]; 2556 } 2557 } 2558 /* assembling */ 2559 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2561 2562 /* 2563 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2564 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2565 */ 2566 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2567 if (pcbddc->use_change_of_basis) { 2568 /* dual and primal dofs on a single cc */ 2569 PetscInt dual_dofs,primal_dofs; 2570 /* working stuff for GEQRF */ 2571 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2572 PetscBLASInt lqr_work; 2573 /* working stuff for UNGQR */ 2574 PetscScalar *gqr_work,lgqr_work_t; 2575 PetscBLASInt lgqr_work; 2576 /* working stuff for TRTRS */ 2577 PetscScalar *trs_rhs; 2578 PetscBLASInt Blas_NRHS; 2579 /* pointers for values insertion into change of basis matrix */ 2580 PetscInt *start_rows,*start_cols; 2581 PetscScalar *start_vals; 2582 /* working stuff for values insertion */ 2583 PetscBT is_primal; 2584 PetscInt *aux_primal_numbering_B; 2585 /* matrix sizes */ 2586 PetscInt global_size,local_size; 2587 /* temporary change of basis */ 2588 Mat localChangeOfBasisMatrix; 2589 /* extra space for debugging */ 2590 PetscScalar *dbg_work; 2591 2592 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2593 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2594 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2595 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2596 /* nonzeros for local mat */ 2597 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2598 for (i=0;i<pcis->n;i++) nnz[i]=1; 2599 for (i=n_vertices;i<total_counts_cc;i++) { 2600 if (PetscBTLookup(change_basis,i)) { 2601 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2602 if (PetscBTLookup(qr_needed_idx,i)) { 2603 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2604 } else { 2605 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2606 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2607 } 2608 } 2609 } 2610 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2611 ierr = PetscFree(nnz);CHKERRQ(ierr); 2612 /* Set initial identity in the matrix */ 2613 for (i=0;i<pcis->n;i++) { 2614 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2615 } 2616 2617 if (pcbddc->dbg_flag) { 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2620 } 2621 2622 2623 /* Now we loop on the constraints which need a change of basis */ 2624 /* 2625 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2626 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2627 2628 Basic blocks of change of basis matrix T computed by 2629 2630 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2631 2632 | 1 0 ... 0 s_1/S | 2633 | 0 1 ... 0 s_2/S | 2634 | ... | 2635 | 0 ... 1 s_{n-1}/S | 2636 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2637 2638 with S = \sum_{i=1}^n s_i^2 2639 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2640 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2641 2642 - QR decomposition of constraints otherwise 2643 */ 2644 if (qr_needed) { 2645 /* space to store Q */ 2646 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2647 /* first we issue queries for optimal work */ 2648 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2651 lqr_work = -1; 2652 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2653 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2654 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2655 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2656 lgqr_work = -1; 2657 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2658 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2659 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2661 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2662 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2663 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2664 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2665 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2666 /* array to store scaling factors for reflectors */ 2667 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2668 /* array to store rhs and solution of triangular solver */ 2669 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2670 /* allocating workspace for check */ 2671 if (pcbddc->dbg_flag) { 2672 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2673 } 2674 } 2675 /* array to store whether a node is primal or not */ 2676 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2677 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2678 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2679 if (i != total_primal_vertices) { 2680 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2681 } 2682 for (i=0;i<total_primal_vertices;i++) { 2683 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2684 } 2685 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2686 2687 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2688 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2689 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2690 if (PetscBTLookup(change_basis,total_counts)) { 2691 /* get constraint info */ 2692 primal_dofs = constraints_n[total_counts]; 2693 dual_dofs = size_of_constraint-primal_dofs; 2694 2695 if (pcbddc->dbg_flag) { 2696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2697 } 2698 2699 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2700 2701 /* copy quadrature constraints for change of basis check */ 2702 if (pcbddc->dbg_flag) { 2703 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2704 } 2705 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2706 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2707 2708 /* compute QR decomposition of constraints */ 2709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2713 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2715 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2716 2717 /* explictly compute R^-T */ 2718 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2719 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2724 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2725 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2726 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2728 2729 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2734 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2735 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2736 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2737 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2738 2739 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2740 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2741 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2743 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2748 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2749 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2750 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2751 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2752 2753 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2754 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2755 /* insert cols for primal dofs */ 2756 for (j=0;j<primal_dofs;j++) { 2757 start_vals = &qr_basis[j*size_of_constraint]; 2758 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2759 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2760 } 2761 /* insert cols for dual dofs */ 2762 for (j=0,k=0;j<dual_dofs;k++) { 2763 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2764 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2765 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2766 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2767 j++; 2768 } 2769 } 2770 2771 /* check change of basis */ 2772 if (pcbddc->dbg_flag) { 2773 PetscInt ii,jj; 2774 PetscBool valid_qr=PETSC_TRUE; 2775 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2776 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2781 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2782 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2784 for (jj=0;jj<size_of_constraint;jj++) { 2785 for (ii=0;ii<primal_dofs;ii++) { 2786 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2787 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2788 } 2789 } 2790 if (!valid_qr) { 2791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2792 for (jj=0;jj<size_of_constraint;jj++) { 2793 for (ii=0;ii<primal_dofs;ii++) { 2794 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2795 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2796 } 2797 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2798 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2799 } 2800 } 2801 } 2802 } else { 2803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2804 } 2805 } 2806 } else { /* simple transformation block */ 2807 PetscInt row,col; 2808 PetscScalar val,norm; 2809 2810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2811 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2812 for (j=0;j<size_of_constraint;j++) { 2813 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2814 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2815 if (!PetscBTLookup(is_primal,row_B)) { 2816 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2817 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2818 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2819 } else { 2820 for (k=0;k<size_of_constraint;k++) { 2821 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2822 if (row != col) { 2823 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2824 } else { 2825 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2826 } 2827 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2828 } 2829 } 2830 } 2831 if (pcbddc->dbg_flag) { 2832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2833 } 2834 } 2835 } else { 2836 if (pcbddc->dbg_flag) { 2837 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2838 } 2839 } 2840 } 2841 2842 /* free workspace */ 2843 if (qr_needed) { 2844 if (pcbddc->dbg_flag) { 2845 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2846 } 2847 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2848 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2849 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2850 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2852 } 2853 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2854 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 2857 /* assembling of global change of variable */ 2858 { 2859 Mat tmat; 2860 PetscInt bs; 2861 2862 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2863 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2864 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2865 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2867 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2868 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2869 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2870 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2871 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2872 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2873 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2874 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2875 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2876 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2878 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2879 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2880 } 2881 /* check */ 2882 if (pcbddc->dbg_flag) { 2883 PetscReal error; 2884 Vec x,x_change; 2885 2886 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2887 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2888 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2889 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2890 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2892 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2893 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2895 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2896 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2897 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2898 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2900 ierr = VecDestroy(&x);CHKERRQ(ierr); 2901 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2902 } 2903 2904 /* adapt sub_schurs computed (if any) */ 2905 if (pcbddc->use_deluxe_scaling) { 2906 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2907 if (sub_schurs->S_Ej_all) { 2908 Mat S_new,tmat; 2909 IS is_all_N; 2910 2911 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2912 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2913 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2914 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2915 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2916 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2917 sub_schurs->S_Ej_all = S_new; 2918 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2919 if (sub_schurs->sum_S_Ej_all) { 2920 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2921 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2922 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2923 sub_schurs->sum_S_Ej_all = S_new; 2924 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2925 } 2926 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2927 } 2928 } 2929 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2930 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2931 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2932 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2933 } 2934 2935 /* set up change of basis context */ 2936 if (pcbddc->ChangeOfBasisMatrix) { 2937 PCBDDCChange_ctx change_ctx; 2938 2939 if (!pcbddc->new_global_mat) { 2940 PetscInt global_size,local_size; 2941 2942 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2943 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2944 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2946 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2947 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2948 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2949 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2950 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2951 } else { 2952 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2953 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2954 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2955 } 2956 if (!pcbddc->user_ChangeOfBasisMatrix) { 2957 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2958 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2959 } else { 2960 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2961 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2962 } 2963 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2964 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2965 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 } 2968 2969 /* check if a new primal space has been introduced */ 2970 pcbddc->new_primal_space_local = PETSC_TRUE; 2971 if (olocal_primal_size == pcbddc->local_primal_size) { 2972 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2973 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2974 if (!pcbddc->new_primal_space_local) { 2975 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2976 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2977 } 2978 } 2979 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2980 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2981 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2982 2983 /* flush dbg viewer */ 2984 if (pcbddc->dbg_flag) { 2985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2986 } 2987 2988 /* free workspace */ 2989 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2990 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2991 if (!pcbddc->adaptive_selection) { 2992 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2993 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2994 } else { 2995 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2996 pcbddc->adaptive_constraints_idxs_ptr, 2997 pcbddc->adaptive_constraints_data_ptr, 2998 pcbddc->adaptive_constraints_idxs, 2999 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3000 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3001 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3008 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3009 { 3010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3011 PC_IS *pcis = (PC_IS*)pc->data; 3012 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3013 PetscInt ierr,i,vertex_size,N; 3014 PetscViewer viewer=pcbddc->dbg_viewer; 3015 3016 PetscFunctionBegin; 3017 /* Reset previously computed graph */ 3018 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3019 /* Init local Graph struct */ 3020 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3021 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3022 3023 /* Check validity of the csr graph passed in by the user */ 3024 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3025 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3026 } 3027 3028 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3029 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3030 PetscInt *xadj,*adjncy; 3031 PetscInt nvtxs; 3032 PetscBool flg_row=PETSC_FALSE; 3033 3034 if (pcbddc->use_local_adj) { 3035 3036 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3037 if (flg_row) { 3038 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3039 pcbddc->computed_rowadj = PETSC_TRUE; 3040 } 3041 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3043 IS is_dummy; 3044 ISLocalToGlobalMapping l2gmap_dummy; 3045 PetscInt j,sum; 3046 PetscInt *cxadj,*cadjncy; 3047 const PetscInt *idxs; 3048 PCBDDCGraph graph; 3049 PetscBT is_on_boundary; 3050 3051 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3052 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3053 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3054 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3055 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3056 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3057 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3058 if (flg_row) { 3059 graph->xadj = xadj; 3060 graph->adjncy = adjncy; 3061 } 3062 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3063 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3064 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3065 3066 if (pcbddc->dbg_flag) { 3067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3068 for (i=0;i<graph->ncc;i++) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3070 } 3071 } 3072 3073 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3074 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3075 for (i=0;i<pcis->n_B;i++) { 3076 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3077 } 3078 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3079 3080 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3081 sum = 0; 3082 for (i=0;i<graph->ncc;i++) { 3083 PetscInt sizecc = 0; 3084 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3085 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3086 sizecc++; 3087 } 3088 } 3089 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3090 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3091 cxadj[graph->queue[j]] = sizecc; 3092 } 3093 } 3094 sum += sizecc*sizecc; 3095 } 3096 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3097 sum = 0; 3098 for (i=0;i<pcis->n;i++) { 3099 PetscInt temp = cxadj[i]; 3100 cxadj[i] = sum; 3101 sum += temp; 3102 } 3103 cxadj[pcis->n] = sum; 3104 for (i=0;i<graph->ncc;i++) { 3105 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3106 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3107 PetscInt k,sizecc = 0; 3108 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3109 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3110 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3111 sizecc++; 3112 } 3113 } 3114 } 3115 } 3116 } 3117 if (sum) { 3118 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3119 } else { 3120 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3121 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3122 } 3123 graph->xadj = 0; 3124 graph->adjncy = 0; 3125 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3126 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3127 } 3128 } 3129 if (pcbddc->dbg_flag) { 3130 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3131 } 3132 3133 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3134 vertex_size = 1; 3135 if (pcbddc->user_provided_isfordofs) { 3136 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3137 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3138 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3139 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3140 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3141 } 3142 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3143 pcbddc->n_ISForDofs = 0; 3144 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3145 } 3146 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3147 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3148 } else { 3149 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3150 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3151 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3152 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3153 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3154 } 3155 } 3156 } 3157 3158 /* Setup of Graph */ 3159 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3160 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3161 } 3162 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3163 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3164 } 3165 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3166 3167 /* Graph's connected components analysis */ 3168 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3169 3170 /* print some info to stdout */ 3171 if (pcbddc->dbg_flag) { 3172 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3173 } 3174 3175 /* mark topography has done */ 3176 pcbddc->recompute_topography = PETSC_FALSE; 3177 PetscFunctionReturn(0); 3178 } 3179 3180 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3181 #undef __FUNCT__ 3182 #define __FUNCT__ "PCBDDCSubsetNumbering" 3183 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3184 { 3185 PetscSF sf; 3186 PetscLayout map; 3187 const PetscInt *idxs; 3188 PetscInt *leaf_data,*root_data,*gidxs; 3189 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3190 PetscInt n_n,nlocals,start,first_index; 3191 PetscMPIInt commsize; 3192 PetscBool first_found; 3193 PetscErrorCode ierr; 3194 3195 PetscFunctionBegin; 3196 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3197 if (subset_mult) { 3198 PetscCheckSameComm(subset,1,subset_mult,2); 3199 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3200 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3201 } 3202 /* create workspace layout for computing global indices of subset */ 3203 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3204 lbounds[0] = lbounds[1] = 0; 3205 for (i=0;i<n;i++) { 3206 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3207 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3208 } 3209 lbounds[0] = -lbounds[0]; 3210 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3211 gbounds[0] = -gbounds[0]; 3212 N = gbounds[1] - gbounds[0] + 1; 3213 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3215 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3216 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3217 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3218 3219 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3220 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3221 if (subset_mult) { 3222 const PetscInt* idxs_mult; 3223 3224 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3225 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3226 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3227 } else { 3228 for (i=0;i<n;i++) leaf_data[i] = 1; 3229 } 3230 /* local size of new subset */ 3231 n_n = 0; 3232 for (i=0;i<n;i++) n_n += leaf_data[i]; 3233 3234 /* global indexes in layout */ 3235 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3236 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3237 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3238 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3239 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3240 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3241 3242 /* reduce from leaves to roots */ 3243 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3244 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3245 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3246 3247 /* count indexes in local part of layout */ 3248 nlocals = 0; 3249 first_index = -1; 3250 first_found = PETSC_FALSE; 3251 for (i=0;i<Nl;i++) { 3252 if (!first_found && root_data[i]) { 3253 first_found = PETSC_TRUE; 3254 first_index = i; 3255 } 3256 nlocals += root_data[i]; 3257 } 3258 3259 /* cumulative of number of indexes and size of subset without holes */ 3260 start = 0; 3261 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3262 if (N_n) { /* compute total size of new subset if requested */ 3263 *N_n = start + nlocals; 3264 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3265 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3266 } 3267 /* adapt roof data with cumulative */ 3268 if (first_found) { 3269 PetscInt old_index; 3270 3271 root_data[first_index] += start; 3272 old_index = first_index; 3273 for (i=first_index+1;i<Nl;i++) { 3274 if (root_data[i]) { 3275 root_data[i] += root_data[old_index]; 3276 old_index = i; 3277 } 3278 } 3279 } 3280 3281 /* from roots to leaves */ 3282 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3283 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3284 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3285 3286 /* create new IS with global indexes without holes */ 3287 if (subset_mult) { 3288 const PetscInt* idxs_mult; 3289 PetscInt cum; 3290 3291 cum = 0; 3292 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3293 for (i=0;i<n;i++) { 3294 PetscInt j; 3295 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3296 } 3297 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3298 } else { 3299 for (i=0;i<n;i++) { 3300 gidxs[i] = leaf_data[i]-1; 3301 } 3302 } 3303 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3304 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 3308 #undef __FUNCT__ 3309 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3310 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3311 { 3312 PetscInt i,j; 3313 PetscScalar *alphas; 3314 PetscErrorCode ierr; 3315 3316 PetscFunctionBegin; 3317 /* this implements stabilized Gram-Schmidt */ 3318 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3319 for (i=0;i<n;i++) { 3320 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3321 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3322 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3323 } 3324 ierr = PetscFree(alphas);CHKERRQ(ierr); 3325 PetscFunctionReturn(0); 3326 } 3327 3328 #undef __FUNCT__ 3329 #define __FUNCT__ "MatISGetSubassemblingPattern" 3330 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3331 { 3332 Mat subdomain_adj; 3333 IS new_ranks,ranks_send_to; 3334 MatPartitioning partitioner; 3335 Mat_IS *matis; 3336 PetscInt n_neighs,*neighs,*n_shared,**shared; 3337 PetscInt prank; 3338 PetscMPIInt size,rank,color; 3339 PetscInt *xadj,*adjncy,*oldranks; 3340 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3341 PetscInt i,local_size,threshold=0; 3342 PetscErrorCode ierr; 3343 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3344 PetscSubcomm subcomm; 3345 3346 PetscFunctionBegin; 3347 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3348 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3349 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3350 3351 /* Get info on mapping */ 3352 matis = (Mat_IS*)(mat->data); 3353 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3354 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3355 3356 /* build local CSR graph of subdomains' connectivity */ 3357 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3358 xadj[0] = 0; 3359 xadj[1] = PetscMax(n_neighs-1,0); 3360 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3361 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3362 3363 if (threshold) { 3364 PetscInt xadj_count = 0; 3365 for (i=1;i<n_neighs;i++) { 3366 if (n_shared[i] > threshold) { 3367 adjncy[xadj_count] = neighs[i]; 3368 adjncy_wgt[xadj_count] = n_shared[i]; 3369 xadj_count++; 3370 } 3371 } 3372 xadj[1] = xadj_count; 3373 } else { 3374 if (xadj[1]) { 3375 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3376 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3377 } 3378 } 3379 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3380 if (use_square) { 3381 for (i=0;i<xadj[1];i++) { 3382 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3383 } 3384 } 3385 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3386 3387 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3388 3389 /* 3390 Restrict work on active processes only. 3391 */ 3392 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3393 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3394 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3395 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3396 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3397 if (color) { 3398 ierr = PetscFree(xadj);CHKERRQ(ierr); 3399 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3400 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3401 } else { 3402 PetscInt coarsening_ratio; 3403 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3404 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3405 prank = rank; 3406 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3407 /* 3408 for (i=0;i<size;i++) { 3409 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3410 } 3411 */ 3412 for (i=0;i<xadj[1];i++) { 3413 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3414 } 3415 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3416 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3417 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3418 3419 /* Partition */ 3420 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3421 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3422 if (use_vwgt) { 3423 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3424 v_wgt[0] = local_size; 3425 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3426 } 3427 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3428 coarsening_ratio = size/n_subdomains; 3429 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3430 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3431 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3432 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3433 3434 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3435 if (contiguous) { 3436 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3437 } else { 3438 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3439 } 3440 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3441 /* clean up */ 3442 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3443 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3444 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3445 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3446 } 3447 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3448 3449 /* assemble parallel IS for sends */ 3450 i = 1; 3451 if (color) i=0; 3452 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3453 3454 /* get back IS */ 3455 *is_sends = ranks_send_to; 3456 PetscFunctionReturn(0); 3457 } 3458 3459 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3460 3461 #undef __FUNCT__ 3462 #define __FUNCT__ "MatISSubassemble" 3463 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3464 { 3465 Mat local_mat; 3466 Mat_IS *matis; 3467 IS is_sends_internal; 3468 PetscInt rows,cols,new_local_rows; 3469 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3470 PetscBool ismatis,isdense,newisdense,destroy_mat; 3471 ISLocalToGlobalMapping l2gmap; 3472 PetscInt* l2gmap_indices; 3473 const PetscInt* is_indices; 3474 MatType new_local_type; 3475 /* buffers */ 3476 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3477 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3478 PetscInt *recv_buffer_idxs_local; 3479 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3480 /* MPI */ 3481 MPI_Comm comm,comm_n; 3482 PetscSubcomm subcomm; 3483 PetscMPIInt n_sends,n_recvs,commsize; 3484 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3485 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3486 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3487 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3488 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3489 PetscErrorCode ierr; 3490 3491 PetscFunctionBegin; 3492 /* TODO: add missing checks */ 3493 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3494 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3495 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3496 PetscValidLogicalCollectiveInt(mat,nis,7); 3497 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3498 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3499 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3500 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3501 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3502 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3503 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3504 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3505 PetscInt mrows,mcols,mnrows,mncols; 3506 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3507 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3508 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3509 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3510 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3511 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3512 } 3513 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3514 PetscValidLogicalCollectiveInt(mat,bs,0); 3515 /* prepare IS for sending if not provided */ 3516 if (!is_sends) { 3517 PetscBool pcontig = PETSC_TRUE; 3518 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3519 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3520 } else { 3521 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3522 is_sends_internal = is_sends; 3523 } 3524 3525 /* get pointer of MATIS data */ 3526 matis = (Mat_IS*)mat->data; 3527 3528 /* get comm */ 3529 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3530 3531 /* compute number of sends */ 3532 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3533 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3534 3535 /* compute number of receives */ 3536 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3537 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3538 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3539 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3540 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3541 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3542 ierr = PetscFree(iflags);CHKERRQ(ierr); 3543 3544 /* restrict comm if requested */ 3545 subcomm = 0; 3546 destroy_mat = PETSC_FALSE; 3547 if (restrict_comm) { 3548 PetscMPIInt color,subcommsize; 3549 3550 color = 0; 3551 if (restrict_full) { 3552 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3553 } else { 3554 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3555 } 3556 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3557 subcommsize = commsize - subcommsize; 3558 /* check if reuse has been requested */ 3559 if (reuse == MAT_REUSE_MATRIX) { 3560 if (*mat_n) { 3561 PetscMPIInt subcommsize2; 3562 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3563 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3564 comm_n = PetscObjectComm((PetscObject)*mat_n); 3565 } else { 3566 comm_n = PETSC_COMM_SELF; 3567 } 3568 } else { /* MAT_INITIAL_MATRIX */ 3569 PetscMPIInt rank; 3570 3571 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3572 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3573 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3574 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3575 comm_n = PetscSubcommChild(subcomm); 3576 } 3577 /* flag to destroy *mat_n if not significative */ 3578 if (color) destroy_mat = PETSC_TRUE; 3579 } else { 3580 comm_n = comm; 3581 } 3582 3583 /* prepare send/receive buffers */ 3584 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3585 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3586 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3587 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3588 if (nis) { 3589 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3590 } 3591 3592 /* Get data from local matrices */ 3593 if (!isdense) { 3594 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3595 /* TODO: See below some guidelines on how to prepare the local buffers */ 3596 /* 3597 send_buffer_vals should contain the raw values of the local matrix 3598 send_buffer_idxs should contain: 3599 - MatType_PRIVATE type 3600 - PetscInt size_of_l2gmap 3601 - PetscInt global_row_indices[size_of_l2gmap] 3602 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3603 */ 3604 } else { 3605 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3606 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3607 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3608 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3609 send_buffer_idxs[1] = i; 3610 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3611 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3612 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3613 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3614 for (i=0;i<n_sends;i++) { 3615 ilengths_vals[is_indices[i]] = len*len; 3616 ilengths_idxs[is_indices[i]] = len+2; 3617 } 3618 } 3619 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3620 /* additional is (if any) */ 3621 if (nis) { 3622 PetscMPIInt psum; 3623 PetscInt j; 3624 for (j=0,psum=0;j<nis;j++) { 3625 PetscInt plen; 3626 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3627 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3628 psum += len+1; /* indices + lenght */ 3629 } 3630 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3631 for (j=0,psum=0;j<nis;j++) { 3632 PetscInt plen; 3633 const PetscInt *is_array_idxs; 3634 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3635 send_buffer_idxs_is[psum] = plen; 3636 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3637 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3638 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3639 psum += plen+1; /* indices + lenght */ 3640 } 3641 for (i=0;i<n_sends;i++) { 3642 ilengths_idxs_is[is_indices[i]] = psum; 3643 } 3644 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3645 } 3646 3647 buf_size_idxs = 0; 3648 buf_size_vals = 0; 3649 buf_size_idxs_is = 0; 3650 for (i=0;i<n_recvs;i++) { 3651 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3652 buf_size_vals += (PetscInt)olengths_vals[i]; 3653 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3654 } 3655 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3656 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3657 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3658 3659 /* get new tags for clean communications */ 3660 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3661 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3662 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3663 3664 /* allocate for requests */ 3665 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3667 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3668 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3669 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3670 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3671 3672 /* communications */ 3673 ptr_idxs = recv_buffer_idxs; 3674 ptr_vals = recv_buffer_vals; 3675 ptr_idxs_is = recv_buffer_idxs_is; 3676 for (i=0;i<n_recvs;i++) { 3677 source_dest = onodes[i]; 3678 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3679 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3680 ptr_idxs += olengths_idxs[i]; 3681 ptr_vals += olengths_vals[i]; 3682 if (nis) { 3683 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3684 ptr_idxs_is += olengths_idxs_is[i]; 3685 } 3686 } 3687 for (i=0;i<n_sends;i++) { 3688 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3689 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3690 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3691 if (nis) { 3692 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3693 } 3694 } 3695 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3696 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3697 3698 /* assemble new l2g map */ 3699 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3700 ptr_idxs = recv_buffer_idxs; 3701 new_local_rows = 0; 3702 for (i=0;i<n_recvs;i++) { 3703 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3704 ptr_idxs += olengths_idxs[i]; 3705 } 3706 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3707 ptr_idxs = recv_buffer_idxs; 3708 new_local_rows = 0; 3709 for (i=0;i<n_recvs;i++) { 3710 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3711 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3712 ptr_idxs += olengths_idxs[i]; 3713 } 3714 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3715 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3716 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3717 3718 /* infer new local matrix type from received local matrices type */ 3719 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3720 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3721 if (n_recvs) { 3722 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3723 ptr_idxs = recv_buffer_idxs; 3724 for (i=0;i<n_recvs;i++) { 3725 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3726 new_local_type_private = MATAIJ_PRIVATE; 3727 break; 3728 } 3729 ptr_idxs += olengths_idxs[i]; 3730 } 3731 switch (new_local_type_private) { 3732 case MATDENSE_PRIVATE: 3733 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3734 new_local_type = MATSEQAIJ; 3735 bs = 1; 3736 } else { /* if I receive only 1 dense matrix */ 3737 new_local_type = MATSEQDENSE; 3738 bs = 1; 3739 } 3740 break; 3741 case MATAIJ_PRIVATE: 3742 new_local_type = MATSEQAIJ; 3743 bs = 1; 3744 break; 3745 case MATBAIJ_PRIVATE: 3746 new_local_type = MATSEQBAIJ; 3747 break; 3748 case MATSBAIJ_PRIVATE: 3749 new_local_type = MATSEQSBAIJ; 3750 break; 3751 default: 3752 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3753 break; 3754 } 3755 } else { /* by default, new_local_type is seqdense */ 3756 new_local_type = MATSEQDENSE; 3757 bs = 1; 3758 } 3759 3760 /* create MATIS object if needed */ 3761 if (reuse == MAT_INITIAL_MATRIX) { 3762 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3763 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3764 } else { 3765 /* it also destroys the local matrices */ 3766 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3767 } 3768 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3769 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3770 3771 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3772 3773 /* Global to local map of received indices */ 3774 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3775 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3776 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3777 3778 /* restore attributes -> type of incoming data and its size */ 3779 buf_size_idxs = 0; 3780 for (i=0;i<n_recvs;i++) { 3781 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3782 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3783 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3784 } 3785 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3786 3787 /* set preallocation */ 3788 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3789 if (!newisdense) { 3790 PetscInt *new_local_nnz=0; 3791 3792 ptr_vals = recv_buffer_vals; 3793 ptr_idxs = recv_buffer_idxs_local; 3794 if (n_recvs) { 3795 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3796 } 3797 for (i=0;i<n_recvs;i++) { 3798 PetscInt j; 3799 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3800 for (j=0;j<*(ptr_idxs+1);j++) { 3801 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3802 } 3803 } else { 3804 /* TODO */ 3805 } 3806 ptr_idxs += olengths_idxs[i]; 3807 } 3808 if (new_local_nnz) { 3809 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3810 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3811 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3812 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3813 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3814 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3815 } else { 3816 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3817 } 3818 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3819 } else { 3820 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3821 } 3822 3823 /* set values */ 3824 ptr_vals = recv_buffer_vals; 3825 ptr_idxs = recv_buffer_idxs_local; 3826 for (i=0;i<n_recvs;i++) { 3827 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3828 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3829 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3830 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3832 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3833 } else { 3834 /* TODO */ 3835 } 3836 ptr_idxs += olengths_idxs[i]; 3837 ptr_vals += olengths_vals[i]; 3838 } 3839 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3840 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3841 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3842 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3844 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3845 3846 #if 0 3847 if (!restrict_comm) { /* check */ 3848 Vec lvec,rvec; 3849 PetscReal infty_error; 3850 3851 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3852 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3853 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3854 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3855 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3856 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3857 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3858 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3859 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3860 } 3861 #endif 3862 3863 /* assemble new additional is (if any) */ 3864 if (nis) { 3865 PetscInt **temp_idxs,*count_is,j,psum; 3866 3867 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3868 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3869 ptr_idxs = recv_buffer_idxs_is; 3870 psum = 0; 3871 for (i=0;i<n_recvs;i++) { 3872 for (j=0;j<nis;j++) { 3873 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3874 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3875 psum += plen; 3876 ptr_idxs += plen+1; /* shift pointer to received data */ 3877 } 3878 } 3879 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3880 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3881 for (i=1;i<nis;i++) { 3882 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3883 } 3884 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3885 ptr_idxs = recv_buffer_idxs_is; 3886 for (i=0;i<n_recvs;i++) { 3887 for (j=0;j<nis;j++) { 3888 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3889 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3890 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3891 ptr_idxs += plen+1; /* shift pointer to received data */ 3892 } 3893 } 3894 for (i=0;i<nis;i++) { 3895 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3896 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3897 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3898 } 3899 ierr = PetscFree(count_is);CHKERRQ(ierr); 3900 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3901 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3902 } 3903 /* free workspace */ 3904 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3905 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3906 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3907 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3908 if (isdense) { 3909 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3910 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3911 } else { 3912 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3913 } 3914 if (nis) { 3915 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3916 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3917 } 3918 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3919 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3920 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3921 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3922 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3923 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3924 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3925 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3926 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3927 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3928 ierr = PetscFree(onodes);CHKERRQ(ierr); 3929 if (nis) { 3930 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3931 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3932 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3933 } 3934 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3935 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3936 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3937 for (i=0;i<nis;i++) { 3938 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3939 } 3940 *mat_n = NULL; 3941 } 3942 PetscFunctionReturn(0); 3943 } 3944 3945 /* temporary hack into ksp private data structure */ 3946 #include <petsc/private/kspimpl.h> 3947 3948 #undef __FUNCT__ 3949 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3950 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3951 { 3952 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3953 PC_IS *pcis = (PC_IS*)pc->data; 3954 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3955 MatNullSpace CoarseNullSpace=NULL; 3956 ISLocalToGlobalMapping coarse_islg; 3957 IS coarse_is,*isarray; 3958 PetscInt i,im_active=-1,active_procs=-1; 3959 PetscInt nis,nisdofs,nisneu; 3960 PC pc_temp; 3961 PCType coarse_pc_type; 3962 KSPType coarse_ksp_type; 3963 PetscBool multilevel_requested,multilevel_allowed; 3964 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3965 Mat t_coarse_mat_is; 3966 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3967 PetscMPIInt all_procs; 3968 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3969 PetscBool compute_vecs = PETSC_FALSE; 3970 PetscScalar *array; 3971 PetscErrorCode ierr; 3972 3973 PetscFunctionBegin; 3974 /* Assign global numbering to coarse dofs */ 3975 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3976 PetscInt ocoarse_size; 3977 compute_vecs = PETSC_TRUE; 3978 ocoarse_size = pcbddc->coarse_size; 3979 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3980 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3981 /* see if we can avoid some work */ 3982 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3983 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3984 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3985 coarse_reuse = PETSC_FALSE; 3986 } else { /* we can safely reuse already computed coarse matrix */ 3987 coarse_reuse = PETSC_TRUE; 3988 } 3989 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3990 coarse_reuse = PETSC_FALSE; 3991 } 3992 /* reset any subassembling information */ 3993 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3994 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3995 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3996 coarse_reuse = PETSC_TRUE; 3997 } 3998 3999 /* count "active" (i.e. with positive local size) and "void" processes */ 4000 im_active = !!(pcis->n); 4001 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4002 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4003 void_procs = all_procs-active_procs; 4004 csin_type_simple = PETSC_TRUE; 4005 redist = PETSC_FALSE; 4006 if (pcbddc->current_level && void_procs) { 4007 csin_ml = PETSC_TRUE; 4008 ncoarse_ml = void_procs; 4009 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4010 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4011 csin_ds = PETSC_TRUE; 4012 ncoarse_ds = pcbddc->redistribute_coarse; 4013 redist = PETSC_TRUE; 4014 } else { 4015 csin_ds = PETSC_TRUE; 4016 ncoarse_ds = active_procs; 4017 redist = PETSC_TRUE; 4018 } 4019 } else { 4020 csin_ml = PETSC_FALSE; 4021 ncoarse_ml = all_procs; 4022 if (void_procs) { 4023 csin_ds = PETSC_TRUE; 4024 ncoarse_ds = void_procs; 4025 csin_type_simple = PETSC_FALSE; 4026 } else { 4027 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4028 csin_ds = PETSC_TRUE; 4029 ncoarse_ds = pcbddc->redistribute_coarse; 4030 redist = PETSC_TRUE; 4031 } else { 4032 csin_ds = PETSC_FALSE; 4033 ncoarse_ds = all_procs; 4034 } 4035 } 4036 } 4037 4038 /* 4039 test if we can go multilevel: three conditions must be satisfied: 4040 - we have not exceeded the number of levels requested 4041 - we can actually subassemble the active processes 4042 - we can find a suitable number of MPI processes where we can place the subassembled problem 4043 */ 4044 multilevel_allowed = PETSC_FALSE; 4045 multilevel_requested = PETSC_FALSE; 4046 if (pcbddc->current_level < pcbddc->max_levels) { 4047 multilevel_requested = PETSC_TRUE; 4048 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4049 multilevel_allowed = PETSC_FALSE; 4050 } else { 4051 multilevel_allowed = PETSC_TRUE; 4052 } 4053 } 4054 /* determine number of process partecipating to coarse solver */ 4055 if (multilevel_allowed) { 4056 ncoarse = ncoarse_ml; 4057 csin = csin_ml; 4058 redist = PETSC_FALSE; 4059 } else { 4060 ncoarse = ncoarse_ds; 4061 csin = csin_ds; 4062 } 4063 4064 /* creates temporary l2gmap and IS for coarse indexes */ 4065 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4066 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4067 4068 /* creates temporary MATIS object for coarse matrix */ 4069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4070 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4071 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4072 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4073 #if 0 4074 { 4075 PetscViewer viewer; 4076 char filename[256]; 4077 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4078 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4079 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4080 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4081 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4082 } 4083 #endif 4084 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4085 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4086 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4087 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4088 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4089 4090 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4091 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4092 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4093 const PetscInt *idxs; 4094 ISLocalToGlobalMapping tmap; 4095 4096 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4097 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4098 /* allocate space for temporary storage */ 4099 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4100 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4101 /* allocate for IS array */ 4102 nisdofs = pcbddc->n_ISForDofsLocal; 4103 nisneu = !!pcbddc->NeumannBoundariesLocal; 4104 nis = nisdofs + nisneu; 4105 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4106 /* dofs splitting */ 4107 for (i=0;i<nisdofs;i++) { 4108 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4109 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4110 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4111 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4112 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4113 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4114 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4115 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4116 } 4117 /* neumann boundaries */ 4118 if (pcbddc->NeumannBoundariesLocal) { 4119 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4120 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4121 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4122 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4123 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4124 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4125 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4126 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4127 } 4128 /* free memory */ 4129 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4130 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4131 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4132 } else { 4133 nis = 0; 4134 nisdofs = 0; 4135 nisneu = 0; 4136 isarray = NULL; 4137 } 4138 /* destroy no longer needed map */ 4139 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4140 4141 /* restrict on coarse candidates (if needed) */ 4142 coarse_mat_is = NULL; 4143 if (csin) { 4144 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4145 if (redist) { 4146 PetscMPIInt rank; 4147 PetscInt spc,n_spc_p1,dest[1],destsize; 4148 4149 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4150 spc = active_procs/ncoarse; 4151 n_spc_p1 = active_procs%ncoarse; 4152 if (im_active) { 4153 destsize = 1; 4154 if (rank > n_spc_p1*(spc+1)-1) { 4155 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4156 } else { 4157 dest[0] = rank/(spc+1); 4158 } 4159 } else { 4160 destsize = 0; 4161 } 4162 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4163 } else if (csin_type_simple) { 4164 PetscMPIInt rank; 4165 PetscInt issize,isidx; 4166 4167 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4168 if (im_active) { 4169 issize = 1; 4170 isidx = (PetscInt)rank; 4171 } else { 4172 issize = 0; 4173 isidx = -1; 4174 } 4175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4176 } else { /* get a suitable subassembling pattern from MATIS code */ 4177 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4178 } 4179 4180 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4181 if (!redist || ncoarse <= void_procs) { 4182 PetscInt ncoarse_cand,tissize,*nisindices; 4183 PetscInt *coarse_candidates; 4184 const PetscInt* tisindices; 4185 4186 /* get coarse candidates' ranks in pc communicator */ 4187 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4188 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4189 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4190 if (!coarse_candidates[i]) { 4191 coarse_candidates[ncoarse_cand++]=i; 4192 } 4193 } 4194 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4195 4196 4197 if (pcbddc->dbg_flag) { 4198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4199 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4200 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4201 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4202 for (i=0;i<ncoarse_cand;i++) { 4203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4204 } 4205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4207 } 4208 /* shift the pattern on coarse candidates */ 4209 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4210 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4211 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4212 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4213 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4214 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4215 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4216 } 4217 if (pcbddc->dbg_flag) { 4218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4220 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4221 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4222 } 4223 } 4224 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4225 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4226 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4227 } else { /* this is the last level, so use just receiving processes in subcomm */ 4228 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4229 } 4230 } else { 4231 if (pcbddc->dbg_flag) { 4232 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4234 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4235 } 4236 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4237 coarse_mat_is = t_coarse_mat_is; 4238 } 4239 4240 /* create local to global scatters for coarse problem */ 4241 if (compute_vecs) { 4242 PetscInt lrows; 4243 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4244 if (coarse_mat_is) { 4245 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4246 } else { 4247 lrows = 0; 4248 } 4249 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4250 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4251 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4252 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4253 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4254 } 4255 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4256 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4257 4258 /* set defaults for coarse KSP and PC */ 4259 if (multilevel_allowed) { 4260 coarse_ksp_type = KSPRICHARDSON; 4261 coarse_pc_type = PCBDDC; 4262 } else { 4263 coarse_ksp_type = KSPPREONLY; 4264 coarse_pc_type = PCREDUNDANT; 4265 } 4266 4267 /* print some info if requested */ 4268 if (pcbddc->dbg_flag) { 4269 if (!multilevel_allowed) { 4270 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4271 if (multilevel_requested) { 4272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4273 } else if (pcbddc->max_levels) { 4274 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4275 } 4276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4277 } 4278 } 4279 4280 /* create the coarse KSP object only once with defaults */ 4281 if (coarse_mat_is) { 4282 MatReuse coarse_mat_reuse; 4283 PetscViewer dbg_viewer = NULL; 4284 if (pcbddc->dbg_flag) { 4285 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4286 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4287 } 4288 if (!pcbddc->coarse_ksp) { 4289 char prefix[256],str_level[16]; 4290 size_t len; 4291 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4292 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4293 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4294 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4295 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4296 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4297 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4298 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4299 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4300 /* prefix */ 4301 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4302 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4303 if (!pcbddc->current_level) { 4304 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4305 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4306 } else { 4307 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4308 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4309 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4310 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4311 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4312 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4313 } 4314 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4315 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4316 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4317 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4318 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4319 /* allow user customization */ 4320 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4321 } 4322 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4323 if (nisdofs) { 4324 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4325 for (i=0;i<nisdofs;i++) { 4326 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4327 } 4328 } 4329 if (nisneu) { 4330 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4331 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4332 } 4333 4334 /* get some info after set from options */ 4335 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4336 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4337 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4338 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4339 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4340 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4341 isbddc = PETSC_FALSE; 4342 } 4343 if (isredundant) { 4344 KSP inner_ksp; 4345 PC inner_pc; 4346 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4347 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4348 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4349 } 4350 4351 /* assemble coarse matrix */ 4352 if (coarse_reuse) { 4353 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4354 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4355 coarse_mat_reuse = MAT_REUSE_MATRIX; 4356 } else { 4357 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4358 } 4359 if (isbddc || isnn) { 4360 if (pcbddc->coarsening_ratio > 1) { 4361 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4362 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4363 if (pcbddc->dbg_flag) { 4364 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4365 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4366 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4367 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4368 } 4369 } 4370 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4371 } else { 4372 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4373 coarse_mat = coarse_mat_is; 4374 } 4375 } else { 4376 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4377 } 4378 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4379 4380 /* propagate symmetry info of coarse matrix */ 4381 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4382 if (pc->pmat->symmetric_set) { 4383 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4384 } 4385 if (pc->pmat->hermitian_set) { 4386 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4387 } 4388 if (pc->pmat->spd_set) { 4389 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4390 } 4391 /* set operators */ 4392 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4393 if (pcbddc->dbg_flag) { 4394 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4395 } 4396 } else { /* processes non partecipating to coarse solver (if any) */ 4397 coarse_mat = 0; 4398 } 4399 ierr = PetscFree(isarray);CHKERRQ(ierr); 4400 #if 0 4401 { 4402 PetscViewer viewer; 4403 char filename[256]; 4404 sprintf(filename,"coarse_mat.m"); 4405 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4406 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4407 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4408 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4409 } 4410 #endif 4411 4412 /* Compute coarse null space (special handling by BDDC only) */ 4413 if (pcbddc->NullSpace) { 4414 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4415 } 4416 4417 if (pcbddc->coarse_ksp) { 4418 Vec crhs,csol; 4419 PetscBool ispreonly; 4420 if (CoarseNullSpace) { 4421 if (isbddc) { 4422 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4423 } else { 4424 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4425 } 4426 } 4427 /* setup coarse ksp */ 4428 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4429 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4430 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4431 /* hack */ 4432 if (!csol) { 4433 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4434 } 4435 if (!crhs) { 4436 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4437 } 4438 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4439 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4440 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4441 KSP check_ksp; 4442 KSPType check_ksp_type; 4443 PC check_pc; 4444 Vec check_vec,coarse_vec; 4445 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4446 PetscInt its; 4447 PetscBool compute_eigs; 4448 PetscReal *eigs_r,*eigs_c; 4449 PetscInt neigs; 4450 const char *prefix; 4451 4452 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4453 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4454 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4455 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4456 if (ispreonly) { 4457 check_ksp_type = KSPPREONLY; 4458 compute_eigs = PETSC_FALSE; 4459 } else { 4460 check_ksp_type = KSPGMRES; 4461 compute_eigs = PETSC_TRUE; 4462 } 4463 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4464 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4465 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4466 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4467 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4468 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4469 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4470 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4471 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4472 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4473 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4474 /* create random vec */ 4475 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4476 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4477 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4478 if (CoarseNullSpace) { 4479 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4480 } 4481 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4482 /* solve coarse problem */ 4483 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4484 if (CoarseNullSpace) { 4485 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4486 } 4487 /* set eigenvalue estimation if preonly has not been requested */ 4488 if (compute_eigs) { 4489 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4490 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4491 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4492 lambda_max = eigs_r[neigs-1]; 4493 lambda_min = eigs_r[0]; 4494 if (pcbddc->use_coarse_estimates) { 4495 if (lambda_max>lambda_min) { 4496 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4497 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4498 } 4499 } 4500 } 4501 4502 /* check coarse problem residual error */ 4503 if (pcbddc->dbg_flag) { 4504 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4505 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4506 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4507 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4508 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4509 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4510 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4511 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4512 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4513 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4514 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4515 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4516 if (compute_eigs) { 4517 PetscReal lambda_max_s,lambda_min_s; 4518 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4519 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4520 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4522 for (i=0;i<neigs;i++) { 4523 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4524 } 4525 } 4526 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4527 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4528 } 4529 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4530 if (compute_eigs) { 4531 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4532 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4533 } 4534 } 4535 } 4536 /* print additional info */ 4537 if (pcbddc->dbg_flag) { 4538 /* waits until all processes reaches this point */ 4539 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4540 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4542 } 4543 4544 /* free memory */ 4545 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4546 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4547 PetscFunctionReturn(0); 4548 } 4549 4550 #undef __FUNCT__ 4551 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4552 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4553 { 4554 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4555 PC_IS* pcis = (PC_IS*)pc->data; 4556 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4557 IS subset,subset_mult,subset_n; 4558 PetscInt local_size,coarse_size=0; 4559 PetscInt *local_primal_indices=NULL; 4560 const PetscInt *t_local_primal_indices; 4561 PetscErrorCode ierr; 4562 4563 PetscFunctionBegin; 4564 /* Compute global number of coarse dofs */ 4565 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4566 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4567 } 4568 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4569 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4570 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4571 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4572 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4573 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4574 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4575 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4576 if (local_size != pcbddc->local_primal_size) { 4577 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4578 } 4579 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4580 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4581 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4582 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4583 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4584 4585 /* check numbering */ 4586 if (pcbddc->dbg_flag) { 4587 PetscScalar coarsesum,*array; 4588 PetscInt i; 4589 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4590 4591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4593 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4594 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4595 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4596 for (i=0;i<pcbddc->local_primal_size;i++) { 4597 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4598 } 4599 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4600 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4601 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4602 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4603 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4604 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4605 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4606 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4607 for (i=0;i<pcis->n;i++) { 4608 if (array[i] == 1.0) { 4609 set_error = PETSC_TRUE; 4610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4611 } 4612 } 4613 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4615 for (i=0;i<pcis->n;i++) { 4616 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4617 } 4618 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4619 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4620 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4621 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4622 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4623 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4624 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4625 PetscInt *gidxs; 4626 4627 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4628 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4629 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4630 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4632 for (i=0;i<pcbddc->local_primal_size;i++) { 4633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4634 } 4635 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4636 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4637 } 4638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4639 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4640 } 4641 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4642 /* get back data */ 4643 *coarse_size_n = coarse_size; 4644 *local_primal_indices_n = local_primal_indices; 4645 PetscFunctionReturn(0); 4646 } 4647 4648 #undef __FUNCT__ 4649 #define __FUNCT__ "PCBDDCGlobalToLocal" 4650 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4651 { 4652 IS localis_t; 4653 PetscInt i,lsize,*idxs,n; 4654 PetscScalar *vals; 4655 PetscErrorCode ierr; 4656 4657 PetscFunctionBegin; 4658 /* get indices in local ordering exploiting local to global map */ 4659 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4660 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4661 for (i=0;i<lsize;i++) vals[i] = 1.0; 4662 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4663 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4664 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4665 if (idxs) { /* multilevel guard */ 4666 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4667 } 4668 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4669 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4670 ierr = PetscFree(vals);CHKERRQ(ierr); 4671 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4672 /* now compute set in local ordering */ 4673 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4674 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4675 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4676 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4677 for (i=0,lsize=0;i<n;i++) { 4678 if (PetscRealPart(vals[i]) > 0.5) { 4679 lsize++; 4680 } 4681 } 4682 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4683 for (i=0,lsize=0;i<n;i++) { 4684 if (PetscRealPart(vals[i]) > 0.5) { 4685 idxs[lsize++] = i; 4686 } 4687 } 4688 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4689 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4690 *localis = localis_t; 4691 PetscFunctionReturn(0); 4692 } 4693 4694 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4695 #undef __FUNCT__ 4696 #define __FUNCT__ "PCBDDCMatMult_Private" 4697 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4698 { 4699 PCBDDCChange_ctx change_ctx; 4700 PetscErrorCode ierr; 4701 4702 PetscFunctionBegin; 4703 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4704 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4705 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4706 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4707 PetscFunctionReturn(0); 4708 } 4709 4710 #undef __FUNCT__ 4711 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4712 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4713 { 4714 PCBDDCChange_ctx change_ctx; 4715 PetscErrorCode ierr; 4716 4717 PetscFunctionBegin; 4718 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4719 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4720 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4721 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 #undef __FUNCT__ 4726 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4727 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4728 { 4729 PC_IS *pcis=(PC_IS*)pc->data; 4730 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4731 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4732 Mat S_j; 4733 PetscInt *used_xadj,*used_adjncy; 4734 PetscBool free_used_adj; 4735 PetscErrorCode ierr; 4736 4737 PetscFunctionBegin; 4738 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4739 free_used_adj = PETSC_FALSE; 4740 if (pcbddc->sub_schurs_layers == -1) { 4741 used_xadj = NULL; 4742 used_adjncy = NULL; 4743 } else { 4744 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4745 used_xadj = pcbddc->mat_graph->xadj; 4746 used_adjncy = pcbddc->mat_graph->adjncy; 4747 } else if (pcbddc->computed_rowadj) { 4748 used_xadj = pcbddc->mat_graph->xadj; 4749 used_adjncy = pcbddc->mat_graph->adjncy; 4750 } else { 4751 PetscBool flg_row=PETSC_FALSE; 4752 const PetscInt *xadj,*adjncy; 4753 PetscInt nvtxs; 4754 4755 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4756 if (flg_row) { 4757 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4758 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4759 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4760 free_used_adj = PETSC_TRUE; 4761 } else { 4762 pcbddc->sub_schurs_layers = -1; 4763 used_xadj = NULL; 4764 used_adjncy = NULL; 4765 } 4766 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4767 } 4768 } 4769 4770 /* setup sub_schurs data */ 4771 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4772 if (!sub_schurs->use_mumps) { 4773 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4774 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4775 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4776 } else { 4777 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4778 if (!pcbddc->use_vertices && reuse_solvers) { 4779 PetscInt n_vertices; 4780 4781 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4782 reuse_solvers = (PetscBool)!n_vertices; 4783 } 4784 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4785 } 4786 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4787 4788 /* free adjacency */ 4789 if (free_used_adj) { 4790 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4791 } 4792 PetscFunctionReturn(0); 4793 } 4794 4795 #undef __FUNCT__ 4796 #define __FUNCT__ "PCBDDCInitSubSchurs" 4797 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4798 { 4799 PC_IS *pcis=(PC_IS*)pc->data; 4800 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4801 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4802 PCBDDCGraph graph; 4803 PetscErrorCode ierr; 4804 4805 PetscFunctionBegin; 4806 /* attach interface graph for determining subsets */ 4807 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4808 IS verticesIS,verticescomm; 4809 PetscInt vsize,*idxs; 4810 4811 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4812 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4813 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4814 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4815 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4816 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4817 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4818 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4819 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4820 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4821 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4822 /* 4823 if (pcbddc->dbg_flag) { 4824 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4825 } 4826 */ 4827 } else { 4828 graph = pcbddc->mat_graph; 4829 } 4830 4831 /* sub_schurs init */ 4832 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4833 4834 /* free graph struct */ 4835 if (pcbddc->sub_schurs_rebuild) { 4836 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4837 } 4838 PetscFunctionReturn(0); 4839 } 4840