1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 273 } else { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 291 for (j=0;j<B_N;j++) { 292 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 293 } 294 } 295 } 296 #if 0 297 for (j=0;j<B_neigs;j++) { 298 PetscBLASInt Blas_N,Blas_one = 1.0; 299 PetscScalar norm; 300 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 301 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 302 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 303 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 304 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 305 } else { 306 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 307 } 308 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 } 310 #endif 311 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 312 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 313 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 314 cum++; 315 } 316 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 317 /* shift for next computation */ 318 cumarray += subset_size*subset_size; 319 } 320 if (pcbddc->dbg_flag) { 321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 322 } 323 324 if (mss) { 325 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 326 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 327 /* destroy matrices (junk) */ 328 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 329 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 330 } 331 if (allocated_S_St) { 332 ierr = PetscFree2(S,St);CHKERRQ(ierr); 333 } 334 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 335 #if defined(PETSC_USE_COMPLEX) 336 ierr = PetscFree(rwork);CHKERRQ(ierr); 337 #endif 338 if (pcbddc->dbg_flag) { 339 PetscInt maxneigs_r; 340 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCSetUpSolvers" 348 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 349 { 350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 351 PetscScalar *coarse_submat_vals; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 /* Setup local scatters R_to_B and (optionally) R_to_D */ 356 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 357 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 358 359 /* Setup local neumann solver ksp_R */ 360 /* PCBDDCSetUpLocalScatters should be called first! */ 361 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 362 363 /* Change global null space passed in by the user if change of basis has been requested */ 364 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 365 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 366 } 367 368 /* 369 Setup local correction and local part of coarse basis. 370 Gives back the dense local part of the coarse matrix in column major ordering 371 */ 372 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 373 374 /* Compute total number of coarse nodes and setup coarse solver */ 375 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 376 377 /* free */ 378 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCBDDCResetCustomization" 384 PetscErrorCode PCBDDCResetCustomization(PC pc) 385 { 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PetscErrorCode ierr; 388 389 PetscFunctionBegin; 390 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 392 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 393 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 394 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 396 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 397 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 398 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 399 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 400 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetTopography" 406 PetscErrorCode PCBDDCResetTopography(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 415 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 416 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCResetSolvers" 422 PetscErrorCode PCBDDCResetSolvers(PC pc) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 PetscScalar *array; 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 430 if (pcbddc->coarse_phi_B) { 431 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 432 ierr = PetscFree(array);CHKERRQ(ierr); 433 } 434 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 435 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 436 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 438 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 442 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 445 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 446 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 447 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 448 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 449 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 450 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 453 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 454 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 455 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 456 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 462 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 463 { 464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 465 PC_IS *pcis = (PC_IS*)pc->data; 466 VecType impVecType; 467 PetscInt n_constraints,n_R,old_size; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 if (!pcbddc->ConstraintMatrix) { 472 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 473 } 474 /* get sizes */ 475 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 476 n_R = pcis->n-pcbddc->n_vertices; 477 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 478 /* local work vectors (try to avoid unneeded work)*/ 479 /* R nodes */ 480 old_size = -1; 481 if (pcbddc->vec1_R) { 482 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 483 } 484 if (n_R != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 487 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 488 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 489 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 490 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 491 } 492 /* local primal dofs */ 493 old_size = -1; 494 if (pcbddc->vec1_P) { 495 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 496 } 497 if (pcbddc->local_primal_size != old_size) { 498 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 502 } 503 /* local explicit constraints */ 504 old_size = -1; 505 if (pcbddc->vec1_C) { 506 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 507 } 508 if (n_constraints && n_constraints != old_size) { 509 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 511 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 512 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 513 } 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCBDDCSetUpCorrection" 519 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 520 { 521 PetscErrorCode ierr; 522 /* pointers to pcis and pcbddc */ 523 PC_IS* pcis = (PC_IS*)pc->data; 524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 526 /* submatrices of local problem */ 527 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 528 /* submatrices of local coarse problem */ 529 Mat S_VV,S_CV,S_VC,S_CC; 530 /* working matrices */ 531 Mat C_CR; 532 /* additional working stuff */ 533 PC pc_R; 534 Mat F; 535 PetscBool isLU,isCHOL,isILU; 536 537 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 538 PetscScalar *work; 539 PetscInt *idx_V_B; 540 PetscInt n,n_vertices,n_constraints; 541 PetscInt i,n_R,n_D,n_B; 542 PetscBool unsymmetric_check; 543 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 544 MatType impMatType; 545 /* some shortcuts to scalars */ 546 PetscScalar one=1.0,m_one=-1.0; 547 548 PetscFunctionBegin; 549 n_vertices = pcbddc->n_vertices; 550 n_constraints = pcbddc->local_primal_size-n_vertices; 551 /* Set Non-overlapping dimensions */ 552 n_B = pcis->n_B; 553 n_D = pcis->n - n_B; 554 n_R = pcis->n - n_vertices; 555 556 /* Set types for local objects needed by BDDC precondtioner */ 557 impMatType = MATSEQDENSE; 558 559 /* vertices in boundary numbering */ 560 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 561 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 562 if (i != n_vertices) { 563 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 564 } 565 566 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 567 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 568 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 569 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 571 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 573 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 576 577 unsymmetric_check = PETSC_FALSE; 578 /* allocate workspace */ 579 n = 0; 580 if (n_constraints) { 581 n += n_R*n_constraints; 582 } 583 if (n_vertices) { 584 n = PetscMax(2*n_R*n_vertices,n); 585 n = PetscMax((n_R+n_B)*n_vertices,n); 586 } 587 if (!pcbddc->symmetric_primal) { 588 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 589 unsymmetric_check = PETSC_TRUE; 590 } 591 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 592 593 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 594 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 595 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 596 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 597 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 598 if (isLU || isILU || isCHOL) { 599 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 600 } else if (sub_schurs->reuse_mumps) { 601 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 602 MatFactorType type; 603 604 F = reuse_mumps->F; 605 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 606 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 607 } else { 608 F = NULL; 609 } 610 611 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 612 if (n_constraints) { 613 Mat M1,M2,M3; 614 Mat auxmat; 615 IS is_aux; 616 PetscScalar *array,*array2; 617 618 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 627 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 641 if (F) { 642 Mat B; 643 644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 645 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 646 ierr = MatDestroy(&B);CHKERRQ(ierr); 647 } else { 648 PetscScalar *marr; 649 650 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 651 for (i=0;i<n_constraints;i++) { 652 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 653 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 654 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 657 } 658 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 659 } 660 if (!pcbddc->switch_static) { 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 662 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 663 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 664 for (i=0;i<n_constraints;i++) { 665 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 666 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 667 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 670 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 671 } 672 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 674 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 675 } else { 676 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 677 pcbddc->local_auxmat2 = local_auxmat2_R; 678 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 679 } 680 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 681 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 682 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 683 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 684 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 685 if (isCHOL) { 686 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 687 } else { 688 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 689 } 690 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 691 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 692 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 693 ierr = MatDestroy(&M2);CHKERRQ(ierr); 694 ierr = MatDestroy(&M3);CHKERRQ(ierr); 695 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 696 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 697 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 698 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 699 ierr = MatDestroy(&M1);CHKERRQ(ierr); 700 } 701 /* Get submatrices from subdomain matrix */ 702 if (n_vertices) { 703 Mat newmat; 704 IS is_aux; 705 PetscInt ibs,mbs; 706 PetscBool issbaij; 707 708 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 709 IS tis; 710 711 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 712 ierr = ISSort(tis);CHKERRQ(ierr); 713 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 714 ierr = ISDestroy(&tis);CHKERRQ(ierr); 715 } else { 716 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 } 718 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 719 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 720 if (ibs != mbs) { /* need to convert to SEQAIJ */ 721 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 724 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 725 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 727 } else { 728 /* this is safe */ 729 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 731 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 732 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 734 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 735 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 736 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 738 } else { 739 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 741 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 742 } 743 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 744 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 745 } 746 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 747 } 748 749 /* Matrix of coarse basis functions (local) */ 750 if (pcbddc->coarse_phi_B) { 751 PetscInt on_B,on_primal,on_D=n_D; 752 if (pcbddc->coarse_phi_D) { 753 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 754 } 755 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 756 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 757 PetscScalar *marray; 758 759 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 760 ierr = PetscFree(marray);CHKERRQ(ierr); 761 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 762 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 763 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 765 } 766 } 767 768 if (!pcbddc->coarse_phi_B) { 769 PetscScalar *marray; 770 771 n = n_B*pcbddc->local_primal_size; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 n += n_D*pcbddc->local_primal_size; 774 } 775 if (!pcbddc->symmetric_primal) { 776 n *= 2; 777 } 778 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 780 n = n_B*pcbddc->local_primal_size; 781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 783 n += n_D*pcbddc->local_primal_size; 784 } 785 if (!pcbddc->symmetric_primal) { 786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 n = n_B*pcbddc->local_primal_size; 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 793 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 796 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 797 } 798 } 799 } 800 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 801 /* vertices */ 802 if (n_vertices) { 803 804 if (n_R) { 805 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 806 PetscBLASInt B_N,B_one = 1; 807 PetscScalar *x,*y; 808 809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 810 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 811 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 812 if (F) { /* TODO could be optimized for symmetric problems */ 813 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 814 } else { 815 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 816 for (i=0;i<n_vertices;i++) { 817 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 818 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 819 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 820 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 822 } 823 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 826 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 827 if (n_constraints) { 828 Mat B; 829 830 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 831 for (i=0;i<n_vertices;i++) { 832 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 838 } 839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 841 ierr = MatDestroy(&B);CHKERRQ(ierr); 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 843 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 844 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 845 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 846 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 847 ierr = MatDestroy(&B);CHKERRQ(ierr); 848 } 849 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 850 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 851 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 852 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 854 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 855 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 856 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 857 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 858 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 859 } else { 860 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 861 } 862 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 863 /* coarse basis functions */ 864 for (i=0;i<n_vertices;i++) { 865 PetscScalar *y; 866 867 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 868 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 870 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 y[n_B*i+idx_V_B[i]] = 1.0; 873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 875 876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 877 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 878 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 879 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 883 } 884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 885 } 886 } 887 888 if (n_constraints) { 889 Mat B; 890 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 892 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 893 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 894 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 895 if (n_vertices) { 896 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 897 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 898 } else { 899 Mat S_VCt; 900 901 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 902 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 903 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<n_constraints;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 919 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 921 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 924 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 925 } 926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 927 } 928 } 929 if (n_constraints) { 930 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 931 } 932 933 /* compute other basis functions for non-symmetric problems */ 934 if (!pcbddc->symmetric_primal) { 935 936 if (n_constraints) { 937 Mat S_CCT,B_C; 938 939 /* this is a lazy thing */ 940 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 942 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 944 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 945 if (n_vertices) { 946 Mat B_V,S_VCT; 947 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 949 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 950 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 951 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 952 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 953 } 954 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 955 } else { /* if there are no constraints, reset work */ 956 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 957 } 958 if (n_vertices && n_R) { 959 Mat A_VRT; 960 PetscScalar *marray; 961 PetscBLASInt B_N,B_one = 1; 962 963 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 964 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 965 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 966 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 967 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 968 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 969 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 970 } 971 972 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 973 for (i=0;i<pcbddc->local_primal_size;i++) { 974 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 975 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 976 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 977 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 979 } 980 } else { 981 for (i=0;i<pcbddc->local_primal_size;i++) { 982 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 983 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 984 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 986 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 987 } 988 } 989 /* coarse basis functions */ 990 for (i=0;i<pcbddc->local_primal_size;i++) { 991 PetscScalar *y; 992 993 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 994 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 996 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 if (i<n_vertices) { 999 y[n_B*i+idx_V_B[i]] = 1.0; 1000 } 1001 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1003 1004 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1005 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1006 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1007 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1010 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1011 } 1012 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1013 } 1014 } 1015 /* free memory */ 1016 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1017 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1018 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1019 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1020 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1021 ierr = PetscFree(work);CHKERRQ(ierr); 1022 if (n_vertices) { 1023 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1024 } 1025 if (n_constraints) { 1026 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1027 } 1028 /* Checking coarse_sub_mat and coarse basis functios */ 1029 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1030 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1031 if (pcbddc->dbg_flag) { 1032 Mat coarse_sub_mat; 1033 Mat AUXMAT,TM1,TM2,TM3,TM4; 1034 Mat coarse_phi_D,coarse_phi_B; 1035 Mat coarse_psi_D,coarse_psi_B; 1036 Mat A_II,A_BB,A_IB,A_BI; 1037 Mat C_B,CPHI; 1038 IS is_dummy; 1039 Vec mones; 1040 MatType checkmattype=MATSEQAIJ; 1041 PetscReal real_value; 1042 1043 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1044 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1045 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1046 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1047 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1048 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1049 if (unsymmetric_check) { 1050 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1051 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1052 } 1053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1054 1055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1058 if (unsymmetric_check) { 1059 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1060 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1061 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1063 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1066 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1071 } else { 1072 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1073 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1074 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1077 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1078 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1080 } 1081 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1082 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1083 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1084 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1085 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1086 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 1090 /* check constraints */ 1091 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1093 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1094 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1095 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1096 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1097 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1099 if (unsymmetric_check) { 1100 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1101 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1102 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1103 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1105 } 1106 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1107 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1109 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1113 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1114 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1115 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1116 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1117 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1118 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1119 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1120 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1121 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1122 if (unsymmetric_check) { 1123 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1124 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1125 } 1126 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1127 } 1128 /* get back data */ 1129 *coarse_submat_vals_n = coarse_submat_vals; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1136 { 1137 Mat *work_mat; 1138 IS isrow_s,iscol_s; 1139 PetscBool rsorted,csorted; 1140 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1145 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1146 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1148 1149 if (!rsorted) { 1150 const PetscInt *idxs; 1151 PetscInt *idxs_sorted,i; 1152 1153 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1155 for (i=0;i<rsize;i++) { 1156 idxs_perm_r[i] = i; 1157 } 1158 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1159 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1160 for (i=0;i<rsize;i++) { 1161 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1162 } 1163 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1164 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1167 isrow_s = isrow; 1168 } 1169 1170 if (!csorted) { 1171 if (isrow == iscol) { 1172 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1173 iscol_s = isrow_s; 1174 } else { 1175 const PetscInt *idxs; 1176 PetscInt *idxs_sorted,i; 1177 1178 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1179 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1180 for (i=0;i<csize;i++) { 1181 idxs_perm_c[i] = i; 1182 } 1183 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1184 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1185 for (i=0;i<csize;i++) { 1186 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1187 } 1188 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1189 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1190 } 1191 } else { 1192 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1193 iscol_s = iscol; 1194 } 1195 1196 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1197 1198 if (!rsorted || !csorted) { 1199 Mat new_mat; 1200 IS is_perm_r,is_perm_c; 1201 1202 if (!rsorted) { 1203 PetscInt *idxs_r,i; 1204 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1205 for (i=0;i<rsize;i++) { 1206 idxs_r[idxs_perm_r[i]] = i; 1207 } 1208 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1209 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1210 } else { 1211 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1212 } 1213 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1214 1215 if (!csorted) { 1216 if (isrow_s == iscol_s) { 1217 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1218 is_perm_c = is_perm_r; 1219 } else { 1220 PetscInt *idxs_c,i; 1221 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1222 for (i=0;i<csize;i++) { 1223 idxs_c[idxs_perm_c[i]] = i; 1224 } 1225 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1227 } 1228 } else { 1229 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1230 } 1231 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1232 1233 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1234 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1235 work_mat[0] = new_mat; 1236 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1237 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1238 } 1239 1240 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1241 *B = work_mat[0]; 1242 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1243 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1244 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1251 { 1252 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1253 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1254 Mat new_mat; 1255 IS is_local,is_global; 1256 PetscInt local_size; 1257 PetscBool isseqaij; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1262 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1263 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1265 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1266 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1267 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1268 1269 /* check */ 1270 if (pcbddc->dbg_flag) { 1271 Vec x,x_change; 1272 PetscReal error; 1273 1274 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1275 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1276 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1280 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1282 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1283 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1286 ierr = VecDestroy(&x);CHKERRQ(ierr); 1287 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1288 } 1289 1290 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1291 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1292 if (isseqaij) { 1293 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1294 } else { 1295 Mat work_mat; 1296 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1297 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1298 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1299 } 1300 if (matis->A->symmetric_set) { 1301 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1302 #if !defined(PETSC_USE_COMPLEX) 1303 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1304 #endif 1305 } 1306 /* 1307 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1308 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1309 */ 1310 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1316 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1317 { 1318 PC_IS* pcis = (PC_IS*)(pc->data); 1319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1320 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1321 PetscInt *idx_R_local=NULL; 1322 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1323 PetscInt vbs,bs; 1324 PetscBT bitmask=NULL; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* 1329 No need to setup local scatters if 1330 - primal space is unchanged 1331 AND 1332 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1333 AND 1334 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1335 */ 1336 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1337 PetscFunctionReturn(0); 1338 } 1339 /* destroy old objects */ 1340 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1341 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1343 /* Set Non-overlapping dimensions */ 1344 n_B = pcis->n_B; 1345 n_D = pcis->n - n_B; 1346 n_vertices = pcbddc->n_vertices; 1347 1348 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1349 1350 /* create auxiliary bitmask and allocate workspace */ 1351 if (!sub_schurs->reuse_mumps) { 1352 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1353 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1354 for (i=0;i<n_vertices;i++) { 1355 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1356 } 1357 1358 for (i=0, n_R=0; i<pcis->n; i++) { 1359 if (!PetscBTLookup(bitmask,i)) { 1360 idx_R_local[n_R++] = i; 1361 } 1362 } 1363 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1364 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1365 1366 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1367 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1368 } 1369 1370 /* Block code */ 1371 vbs = 1; 1372 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1373 if (bs>1 && !(n_vertices%bs)) { 1374 PetscBool is_blocked = PETSC_TRUE; 1375 PetscInt *vary; 1376 if (!sub_schurs->reuse_mumps) { 1377 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1378 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1379 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1380 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1381 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1382 for (i=0; i<pcis->n/bs; i++) { 1383 if (vary[i]!=0 && vary[i]!=bs) { 1384 is_blocked = PETSC_FALSE; 1385 break; 1386 } 1387 } 1388 ierr = PetscFree(vary);CHKERRQ(ierr); 1389 } else { 1390 /* Verify directly the R set */ 1391 for (i=0; i<n_R/bs; i++) { 1392 PetscInt j,node=idx_R_local[bs*i]; 1393 for (j=1; j<bs; j++) { 1394 if (node != idx_R_local[bs*i+j]-j) { 1395 is_blocked = PETSC_FALSE; 1396 break; 1397 } 1398 } 1399 } 1400 } 1401 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1402 vbs = bs; 1403 for (i=0;i<n_R/vbs;i++) { 1404 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1405 } 1406 } 1407 } 1408 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1409 if (sub_schurs->reuse_mumps) { 1410 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1411 1412 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1413 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1414 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1415 reuse_mumps->is_R = pcbddc->is_R_local; 1416 } else { 1417 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1418 } 1419 1420 /* print some info if requested */ 1421 if (pcbddc->dbg_flag) { 1422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1423 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1425 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1429 } 1430 1431 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1432 if (!sub_schurs->reuse_mumps) { 1433 IS is_aux1,is_aux2; 1434 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1435 1436 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1437 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1439 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1440 for (i=0; i<n_D; i++) { 1441 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1442 } 1443 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 for (i=0, j=0; i<n_R; i++) { 1445 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1446 aux_array1[j++] = i; 1447 } 1448 } 1449 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1450 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_B; i++) { 1452 if (!PetscBTLookup(bitmask,is_indices[i])) { 1453 aux_array2[j++] = i; 1454 } 1455 } 1456 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1457 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1458 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1459 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1460 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1461 1462 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1463 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1464 for (i=0, j=0; i<n_R; i++) { 1465 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1466 aux_array1[j++] = i; 1467 } 1468 } 1469 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1470 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1471 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1472 } 1473 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1474 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1475 } else { 1476 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1477 IS tis; 1478 PetscInt schur_size; 1479 1480 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1481 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1482 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1483 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1484 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1485 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1486 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1487 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1488 } 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1496 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1497 { 1498 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1499 PC_IS *pcis = (PC_IS*)pc->data; 1500 PC pc_temp; 1501 Mat A_RR; 1502 MatReuse reuse; 1503 PetscScalar m_one = -1.0; 1504 PetscReal value; 1505 PetscInt n_D,n_R,ibs,mbs; 1506 PetscBool use_exact,use_exact_reduced,issbaij; 1507 PetscErrorCode ierr; 1508 /* prefixes stuff */ 1509 char dir_prefix[256],neu_prefix[256],str_level[16]; 1510 size_t len; 1511 1512 PetscFunctionBegin; 1513 1514 /* compute prefixes */ 1515 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1516 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1517 if (!pcbddc->current_level) { 1518 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1519 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1522 } else { 1523 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1524 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1525 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1526 len -= 15; /* remove "pc_bddc_coarse_" */ 1527 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1528 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1529 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1530 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1531 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1532 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1533 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1534 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1535 } 1536 1537 /* DIRICHLET PROBLEM */ 1538 if (dirichlet) { 1539 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1540 if (pcbddc->local_mat->symmetric_set) { 1541 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1542 } 1543 /* Matrix for Dirichlet problem is pcis->A_II */ 1544 n_D = pcis->n - pcis->n_B; 1545 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1546 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1548 /* default */ 1549 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1550 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1552 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1553 if (issbaij) { 1554 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1555 } else { 1556 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1557 } 1558 /* Allow user's customization */ 1559 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1560 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1561 } 1562 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1563 if (sub_schurs->reuse_mumps) { 1564 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1565 1566 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1567 } 1568 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1569 if (!n_D) { 1570 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1571 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1572 } 1573 /* Set Up KSP for Dirichlet problem of BDDC */ 1574 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1575 /* set ksp_D into pcis data */ 1576 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1577 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1578 pcis->ksp_D = pcbddc->ksp_D; 1579 } 1580 1581 /* NEUMANN PROBLEM */ 1582 A_RR = 0; 1583 if (neumann) { 1584 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1585 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1586 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1587 if (pcbddc->ksp_R) { /* already created ksp */ 1588 PetscInt nn_R; 1589 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1590 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1591 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1592 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1593 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1594 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1595 reuse = MAT_INITIAL_MATRIX; 1596 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1597 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1598 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1599 reuse = MAT_INITIAL_MATRIX; 1600 } else { /* safe to reuse the matrix */ 1601 reuse = MAT_REUSE_MATRIX; 1602 } 1603 } 1604 /* last check */ 1605 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1606 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1607 reuse = MAT_INITIAL_MATRIX; 1608 } 1609 } else { /* first time, so we need to create the matrix */ 1610 reuse = MAT_INITIAL_MATRIX; 1611 } 1612 /* extract A_RR */ 1613 if (!sub_schurs->reuse_mumps) { 1614 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1615 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1616 if (ibs != mbs) { 1617 Mat newmat; 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1619 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1620 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1623 } 1624 if (pcbddc->local_mat->symmetric_set) { 1625 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1626 } 1627 } else { 1628 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1629 1630 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1631 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1633 } 1634 if (!pcbddc->ksp_R) { /* create object if not present */ 1635 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1637 /* default */ 1638 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1639 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1640 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1641 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1642 if (issbaij) { 1643 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1644 } else { 1645 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1646 } 1647 /* Allow user's customization */ 1648 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1649 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1650 } 1651 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1652 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1653 if (!n_R) { 1654 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1655 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1656 } 1657 /* Reuse MUMPS solver if it is present */ 1658 if (sub_schurs->reuse_mumps) { 1659 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1660 1661 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1662 } 1663 /* Set Up KSP for Neumann problem of BDDC */ 1664 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1665 } 1666 /* free Neumann problem's matrix */ 1667 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1668 1669 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1670 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1671 if (pcbddc->dbg_flag) { 1672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1673 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1675 } 1676 if (dirichlet) { /* Dirichlet */ 1677 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1678 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1679 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1680 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1681 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1682 /* need to be adapted? */ 1683 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1684 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1685 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1686 /* print info */ 1687 if (pcbddc->dbg_flag) { 1688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1690 } 1691 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1692 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1693 } 1694 } 1695 if (neumann) { /* Neumann */ 1696 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1697 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1698 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1700 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1701 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1702 /* need to be adapted? */ 1703 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1704 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1705 /* print info */ 1706 if (pcbddc->dbg_flag) { 1707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1709 } 1710 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1711 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1712 } 1713 } 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1720 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1721 { 1722 PetscErrorCode ierr; 1723 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1724 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1725 1726 PetscFunctionBegin; 1727 if (!sub_schurs->reuse_mumps) { 1728 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1729 } 1730 if (!pcbddc->switch_static) { 1731 if (applytranspose && pcbddc->local_auxmat1) { 1732 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1733 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1734 } 1735 if (!sub_schurs->reuse_mumps) { 1736 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 } else { 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1743 } 1744 } else { 1745 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 if (applytranspose && pcbddc->local_auxmat1) { 1750 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1751 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1752 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 } 1755 } 1756 if (!sub_schurs->reuse_mumps) { 1757 if (applytranspose) { 1758 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1759 } else { 1760 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1761 } 1762 #if defined(PETSC_HAVE_MUMPS) 1763 } else { 1764 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1765 1766 if (applytranspose) { 1767 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1768 } else { 1769 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1770 } 1771 #endif 1772 } 1773 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1774 if (!pcbddc->switch_static) { 1775 if (!sub_schurs->reuse_mumps) { 1776 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 } else { 1779 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1780 1781 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1782 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1783 } 1784 if (!applytranspose && pcbddc->local_auxmat1) { 1785 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1786 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1787 } 1788 } else { 1789 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 if (!applytranspose && pcbddc->local_auxmat1) { 1794 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1795 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1796 } 1797 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1808 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1809 { 1810 PetscErrorCode ierr; 1811 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1812 PC_IS* pcis = (PC_IS*) (pc->data); 1813 const PetscScalar zero = 0.0; 1814 1815 PetscFunctionBegin; 1816 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1817 if (applytranspose) { 1818 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1819 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1820 } else { 1821 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1822 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1823 } 1824 /* start communications from local primal nodes to rhs of coarse solver */ 1825 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1826 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1828 1829 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1830 /* TODO remove null space when doing multilevel */ 1831 if (pcbddc->coarse_ksp) { 1832 Vec rhs,sol; 1833 1834 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1835 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1836 if (applytranspose) { 1837 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1838 } else { 1839 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1840 } 1841 } 1842 1843 /* Local solution on R nodes */ 1844 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1845 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1846 } 1847 1848 /* communications from coarse sol to local primal nodes */ 1849 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1851 1852 /* Sum contributions from two levels */ 1853 if (applytranspose) { 1854 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1855 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1856 } else { 1857 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1858 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1865 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1866 { 1867 PetscErrorCode ierr; 1868 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1869 PetscScalar *array; 1870 Vec from,to; 1871 1872 PetscFunctionBegin; 1873 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1874 from = pcbddc->coarse_vec; 1875 to = pcbddc->vec1_P; 1876 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1877 Vec tvec; 1878 1879 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1880 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1881 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1882 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1883 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1884 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1885 } 1886 } else { /* from local to global -> put data in coarse right hand side */ 1887 from = pcbddc->vec1_P; 1888 to = pcbddc->coarse_vec; 1889 } 1890 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1896 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1897 { 1898 PetscErrorCode ierr; 1899 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1900 PetscScalar *array; 1901 Vec from,to; 1902 1903 PetscFunctionBegin; 1904 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1905 from = pcbddc->coarse_vec; 1906 to = pcbddc->vec1_P; 1907 } else { /* from local to global -> put data in coarse right hand side */ 1908 from = pcbddc->vec1_P; 1909 to = pcbddc->coarse_vec; 1910 } 1911 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1912 if (smode == SCATTER_FORWARD) { 1913 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1914 Vec tvec; 1915 1916 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1917 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1918 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1919 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1920 } 1921 } else { 1922 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1923 ierr = VecResetArray(from);CHKERRQ(ierr); 1924 } 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 /* uncomment for testing purposes */ 1930 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1933 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1934 { 1935 PetscErrorCode ierr; 1936 PC_IS* pcis = (PC_IS*)(pc->data); 1937 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1938 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1939 /* one and zero */ 1940 PetscScalar one=1.0,zero=0.0; 1941 /* space to store constraints and their local indices */ 1942 PetscScalar *constraints_data; 1943 PetscInt *constraints_idxs,*constraints_idxs_B; 1944 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1945 PetscInt *constraints_n; 1946 /* iterators */ 1947 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1948 /* BLAS integers */ 1949 PetscBLASInt lwork,lierr; 1950 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1951 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1952 /* reuse */ 1953 PetscInt olocal_primal_size,olocal_primal_size_cc; 1954 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1955 /* change of basis */ 1956 PetscBool qr_needed; 1957 PetscBT change_basis,qr_needed_idx; 1958 /* auxiliary stuff */ 1959 PetscInt *nnz,*is_indices; 1960 PetscInt ncc; 1961 /* some quantities */ 1962 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1963 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1964 1965 PetscFunctionBegin; 1966 /* Destroy Mat objects computed previously */ 1967 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1968 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1969 /* save info on constraints from previous setup (if any) */ 1970 olocal_primal_size = pcbddc->local_primal_size; 1971 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1972 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1973 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1974 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1975 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1976 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1977 1978 /* print some info */ 1979 if (pcbddc->dbg_flag) { 1980 IS vertices; 1981 PetscInt nv,nedges,nfaces; 1982 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1983 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1984 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1991 } 1992 1993 if (!pcbddc->adaptive_selection) { 1994 IS ISForVertices,*ISForFaces,*ISForEdges; 1995 MatNullSpace nearnullsp; 1996 const Vec *nearnullvecs; 1997 Vec *localnearnullsp; 1998 PetscScalar *array; 1999 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2000 PetscBool nnsp_has_cnst; 2001 /* LAPACK working arrays for SVD or POD */ 2002 PetscBool skip_lapack,boolforchange; 2003 PetscScalar *work; 2004 PetscReal *singular_vals; 2005 #if defined(PETSC_USE_COMPLEX) 2006 PetscReal *rwork; 2007 #endif 2008 #if defined(PETSC_MISSING_LAPACK_GESVD) 2009 PetscScalar *temp_basis,*correlation_mat; 2010 #else 2011 PetscBLASInt dummy_int=1; 2012 PetscScalar dummy_scalar=1.; 2013 #endif 2014 2015 /* Get index sets for faces, edges and vertices from graph */ 2016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2017 /* free unneeded index sets */ 2018 if (!pcbddc->use_vertices) { 2019 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2020 } 2021 if (!pcbddc->use_edges) { 2022 for (i=0;i<n_ISForEdges;i++) { 2023 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2024 } 2025 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2026 n_ISForEdges = 0; 2027 } 2028 if (!pcbddc->use_faces) { 2029 for (i=0;i<n_ISForFaces;i++) { 2030 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2031 } 2032 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2033 n_ISForFaces = 0; 2034 } 2035 2036 #if defined(PETSC_USE_DEBUG) 2037 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2038 Also use_change_of_basis should be consistent among processors */ 2039 if (pcbddc->NullSpace) { 2040 PetscBool tbool[2],gbool[2]; 2041 2042 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2043 pcbddc->use_change_of_basis = PETSC_TRUE; 2044 if (!ISForEdges) { 2045 pcbddc->use_change_on_faces = PETSC_TRUE; 2046 } 2047 } 2048 tbool[0] = pcbddc->use_change_of_basis; 2049 tbool[1] = pcbddc->use_change_on_faces; 2050 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2051 pcbddc->use_change_of_basis = gbool[0]; 2052 pcbddc->use_change_on_faces = gbool[1]; 2053 } 2054 #endif 2055 2056 /* check if near null space is attached to global mat */ 2057 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2058 if (nearnullsp) { 2059 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2060 /* remove any stored info */ 2061 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2062 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2063 /* store information for BDDC solver reuse */ 2064 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2065 pcbddc->onearnullspace = nearnullsp; 2066 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2067 for (i=0;i<nnsp_size;i++) { 2068 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2069 } 2070 } else { /* if near null space is not provided BDDC uses constants by default */ 2071 nnsp_size = 0; 2072 nnsp_has_cnst = PETSC_TRUE; 2073 } 2074 /* get max number of constraints on a single cc */ 2075 max_constraints = nnsp_size; 2076 if (nnsp_has_cnst) max_constraints++; 2077 2078 /* 2079 Evaluate maximum storage size needed by the procedure 2080 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2081 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2082 There can be multiple constraints per connected component 2083 */ 2084 n_vertices = 0; 2085 if (ISForVertices) { 2086 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2087 } 2088 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2089 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2090 2091 total_counts = n_ISForFaces+n_ISForEdges; 2092 total_counts *= max_constraints; 2093 total_counts += n_vertices; 2094 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2095 2096 total_counts = 0; 2097 max_size_of_constraint = 0; 2098 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2099 IS used_is; 2100 if (i<n_ISForEdges) { 2101 used_is = ISForEdges[i]; 2102 } else { 2103 used_is = ISForFaces[i-n_ISForEdges]; 2104 } 2105 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2106 total_counts += j; 2107 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2108 } 2109 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2110 2111 /* get local part of global near null space vectors */ 2112 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2113 for (k=0;k<nnsp_size;k++) { 2114 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2115 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 } 2118 2119 /* whether or not to skip lapack calls */ 2120 skip_lapack = PETSC_TRUE; 2121 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2122 2123 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2124 if (!skip_lapack) { 2125 PetscScalar temp_work; 2126 2127 #if defined(PETSC_MISSING_LAPACK_GESVD) 2128 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2129 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2130 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2131 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2132 #if defined(PETSC_USE_COMPLEX) 2133 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2134 #endif 2135 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2138 lwork = -1; 2139 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2140 #if !defined(PETSC_USE_COMPLEX) 2141 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2142 #else 2143 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2144 #endif 2145 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2146 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2147 #else /* on missing GESVD */ 2148 /* SVD */ 2149 PetscInt max_n,min_n; 2150 max_n = max_size_of_constraint; 2151 min_n = max_constraints; 2152 if (max_size_of_constraint < max_constraints) { 2153 min_n = max_size_of_constraint; 2154 max_n = max_constraints; 2155 } 2156 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2157 #if defined(PETSC_USE_COMPLEX) 2158 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2159 #endif 2160 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2161 lwork = -1; 2162 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2165 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2166 #if !defined(PETSC_USE_COMPLEX) 2167 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2168 #else 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2170 #endif 2171 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2173 #endif /* on missing GESVD */ 2174 /* Allocate optimal workspace */ 2175 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2176 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2177 } 2178 /* Now we can loop on constraining sets */ 2179 total_counts = 0; 2180 constraints_idxs_ptr[0] = 0; 2181 constraints_data_ptr[0] = 0; 2182 /* vertices */ 2183 if (n_vertices) { 2184 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2185 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2186 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2187 for (i=0;i<n_vertices;i++) { 2188 constraints_n[total_counts] = 1; 2189 constraints_data[total_counts] = 1.0; 2190 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2191 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2192 total_counts++; 2193 } 2194 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2195 PetscBool used_vertex; 2196 for (i=0;i<n_vertices;i++) { 2197 used_vertex = PETSC_FALSE; 2198 k = 0; 2199 while (!used_vertex && k<nnsp_size) { 2200 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2201 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2202 constraints_n[total_counts] = 1; 2203 constraints_idxs[total_counts] = is_indices[i]; 2204 constraints_data[total_counts] = 1.0; 2205 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2206 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2207 total_counts++; 2208 used_vertex = PETSC_TRUE; 2209 } 2210 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2211 k++; 2212 } 2213 } 2214 } 2215 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2216 n_vertices = total_counts; 2217 } 2218 2219 /* edges and faces */ 2220 total_counts_cc = total_counts; 2221 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2222 IS used_is; 2223 PetscBool idxs_copied = PETSC_FALSE; 2224 2225 if (ncc<n_ISForEdges) { 2226 used_is = ISForEdges[ncc]; 2227 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2228 } else { 2229 used_is = ISForFaces[ncc-n_ISForEdges]; 2230 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2231 } 2232 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2233 2234 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2235 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2236 /* change of basis should not be performed on local periodic nodes */ 2237 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2238 if (nnsp_has_cnst) { 2239 PetscScalar quad_value; 2240 2241 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2242 idxs_copied = PETSC_TRUE; 2243 2244 if (!pcbddc->use_nnsp_true) { 2245 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2246 } else { 2247 quad_value = 1.0; 2248 } 2249 for (j=0;j<size_of_constraint;j++) { 2250 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2251 } 2252 temp_constraints++; 2253 total_counts++; 2254 } 2255 for (k=0;k<nnsp_size;k++) { 2256 PetscReal real_value; 2257 PetscScalar *ptr_to_data; 2258 2259 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2260 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2261 for (j=0;j<size_of_constraint;j++) { 2262 ptr_to_data[j] = array[is_indices[j]]; 2263 } 2264 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2265 /* check if array is null on the connected component */ 2266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2267 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2268 if (real_value > 0.0) { /* keep indices and values */ 2269 temp_constraints++; 2270 total_counts++; 2271 if (!idxs_copied) { 2272 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2273 idxs_copied = PETSC_TRUE; 2274 } 2275 } 2276 } 2277 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2278 valid_constraints = temp_constraints; 2279 if (!pcbddc->use_nnsp_true && temp_constraints) { 2280 if (temp_constraints == 1) { /* just normalize the constraint */ 2281 PetscScalar norm,*ptr_to_data; 2282 2283 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2285 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2286 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2287 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2288 } else { /* perform SVD */ 2289 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2290 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2291 2292 #if defined(PETSC_MISSING_LAPACK_GESVD) 2293 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2294 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2295 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2296 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2297 from that computed using LAPACKgesvd 2298 -> This is due to a different computation of eigenvectors in LAPACKheev 2299 -> The quality of the POD-computed basis will be the same */ 2300 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2301 /* Store upper triangular part of correlation matrix */ 2302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2303 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2304 for (j=0;j<temp_constraints;j++) { 2305 for (k=0;k<j+1;k++) { 2306 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2307 } 2308 } 2309 /* compute eigenvalues and eigenvectors of correlation matrix */ 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2312 #if !defined(PETSC_USE_COMPLEX) 2313 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2314 #else 2315 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2316 #endif 2317 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2318 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2319 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2320 j = 0; 2321 while (j < temp_constraints && singular_vals[j] < tol) j++; 2322 total_counts = total_counts-j; 2323 valid_constraints = temp_constraints-j; 2324 /* scale and copy POD basis into used quadrature memory */ 2325 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2331 if (j<temp_constraints) { 2332 PetscInt ii; 2333 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2334 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2335 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2337 for (k=0;k<temp_constraints-j;k++) { 2338 for (ii=0;ii<size_of_constraint;ii++) { 2339 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2340 } 2341 } 2342 } 2343 #else /* on missing GESVD */ 2344 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2345 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2348 #if !defined(PETSC_USE_COMPLEX) 2349 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2350 #else 2351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2352 #endif 2353 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2355 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2356 k = temp_constraints; 2357 if (k > size_of_constraint) k = size_of_constraint; 2358 j = 0; 2359 while (j < k && singular_vals[k-j-1] < tol) j++; 2360 valid_constraints = k-j; 2361 total_counts = total_counts-temp_constraints+valid_constraints; 2362 #endif /* on missing GESVD */ 2363 } 2364 } 2365 /* update pointers information */ 2366 if (valid_constraints) { 2367 constraints_n[total_counts_cc] = valid_constraints; 2368 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2369 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2370 /* set change_of_basis flag */ 2371 if (boolforchange) { 2372 PetscBTSet(change_basis,total_counts_cc); 2373 } 2374 total_counts_cc++; 2375 } 2376 } 2377 /* free workspace */ 2378 if (!skip_lapack) { 2379 ierr = PetscFree(work);CHKERRQ(ierr); 2380 #if defined(PETSC_USE_COMPLEX) 2381 ierr = PetscFree(rwork);CHKERRQ(ierr); 2382 #endif 2383 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2384 #if defined(PETSC_MISSING_LAPACK_GESVD) 2385 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2386 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2387 #endif 2388 } 2389 for (k=0;k<nnsp_size;k++) { 2390 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2391 } 2392 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2393 /* free index sets of faces, edges and vertices */ 2394 for (i=0;i<n_ISForFaces;i++) { 2395 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2396 } 2397 if (n_ISForFaces) { 2398 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2399 } 2400 for (i=0;i<n_ISForEdges;i++) { 2401 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2402 } 2403 if (n_ISForEdges) { 2404 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2405 } 2406 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2407 } else { 2408 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2409 2410 total_counts = 0; 2411 n_vertices = 0; 2412 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2413 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2414 } 2415 max_constraints = 0; 2416 total_counts_cc = 0; 2417 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2418 total_counts += pcbddc->adaptive_constraints_n[i]; 2419 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2420 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2421 } 2422 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2423 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2424 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2425 constraints_data = pcbddc->adaptive_constraints_data; 2426 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2427 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2428 total_counts_cc = 0; 2429 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2430 if (pcbddc->adaptive_constraints_n[i]) { 2431 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2432 } 2433 } 2434 #if 0 2435 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2436 for (i=0;i<total_counts_cc;i++) { 2437 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2438 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2439 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2440 printf(" %d",constraints_idxs[j]); 2441 } 2442 printf("\n"); 2443 printf("number of cc: %d\n",constraints_n[i]); 2444 } 2445 for (i=0;i<n_vertices;i++) { 2446 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2447 } 2448 for (i=0;i<sub_schurs->n_subs;i++) { 2449 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2450 } 2451 #endif 2452 2453 max_size_of_constraint = 0; 2454 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2455 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2456 /* Change of basis */ 2457 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2458 if (pcbddc->use_change_of_basis) { 2459 for (i=0;i<sub_schurs->n_subs;i++) { 2460 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2461 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2462 } 2463 } 2464 } 2465 } 2466 pcbddc->local_primal_size = total_counts; 2467 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2468 2469 /* map constraints_idxs in boundary numbering */ 2470 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2471 if (i != constraints_idxs_ptr[total_counts_cc]) { 2472 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2473 } 2474 2475 /* Create constraint matrix */ 2476 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2477 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2478 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2479 2480 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2481 /* determine if a QR strategy is needed for change of basis */ 2482 qr_needed = PETSC_FALSE; 2483 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2484 total_primal_vertices=0; 2485 pcbddc->local_primal_size_cc = 0; 2486 for (i=0;i<total_counts_cc;i++) { 2487 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2488 if (size_of_constraint == 1) { 2489 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2490 pcbddc->local_primal_size_cc += 1; 2491 } else if (PetscBTLookup(change_basis,i)) { 2492 for (k=0;k<constraints_n[i];k++) { 2493 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2494 } 2495 pcbddc->local_primal_size_cc += constraints_n[i]; 2496 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2497 PetscBTSet(qr_needed_idx,i); 2498 qr_needed = PETSC_TRUE; 2499 } 2500 } else { 2501 pcbddc->local_primal_size_cc += 1; 2502 } 2503 } 2504 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2505 pcbddc->n_vertices = total_primal_vertices; 2506 /* permute indices in order to have a sorted set of vertices */ 2507 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2508 2509 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2510 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2511 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2512 2513 /* nonzero structure of constraint matrix */ 2514 /* and get reference dof for local constraints */ 2515 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2516 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2517 2518 j = total_primal_vertices; 2519 total_counts = total_primal_vertices; 2520 cum = total_primal_vertices; 2521 for (i=n_vertices;i<total_counts_cc;i++) { 2522 if (!PetscBTLookup(change_basis,i)) { 2523 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2524 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2525 cum++; 2526 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2527 for (k=0;k<constraints_n[i];k++) { 2528 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2529 nnz[j+k] = size_of_constraint; 2530 } 2531 j += constraints_n[i]; 2532 } 2533 } 2534 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2535 ierr = PetscFree(nnz);CHKERRQ(ierr); 2536 2537 /* set values in constraint matrix */ 2538 for (i=0;i<total_primal_vertices;i++) { 2539 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2540 } 2541 total_counts = total_primal_vertices; 2542 for (i=n_vertices;i<total_counts_cc;i++) { 2543 if (!PetscBTLookup(change_basis,i)) { 2544 PetscInt *cols; 2545 2546 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2547 cols = constraints_idxs+constraints_idxs_ptr[i]; 2548 for (k=0;k<constraints_n[i];k++) { 2549 PetscInt row = total_counts+k; 2550 PetscScalar *vals; 2551 2552 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2553 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 total_counts += constraints_n[i]; 2556 } 2557 } 2558 /* assembling */ 2559 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2561 2562 /* 2563 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2564 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2565 */ 2566 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2567 if (pcbddc->use_change_of_basis) { 2568 /* dual and primal dofs on a single cc */ 2569 PetscInt dual_dofs,primal_dofs; 2570 /* working stuff for GEQRF */ 2571 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2572 PetscBLASInt lqr_work; 2573 /* working stuff for UNGQR */ 2574 PetscScalar *gqr_work,lgqr_work_t; 2575 PetscBLASInt lgqr_work; 2576 /* working stuff for TRTRS */ 2577 PetscScalar *trs_rhs; 2578 PetscBLASInt Blas_NRHS; 2579 /* pointers for values insertion into change of basis matrix */ 2580 PetscInt *start_rows,*start_cols; 2581 PetscScalar *start_vals; 2582 /* working stuff for values insertion */ 2583 PetscBT is_primal; 2584 PetscInt *aux_primal_numbering_B; 2585 /* matrix sizes */ 2586 PetscInt global_size,local_size; 2587 /* temporary change of basis */ 2588 Mat localChangeOfBasisMatrix; 2589 /* extra space for debugging */ 2590 PetscScalar *dbg_work; 2591 2592 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2593 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2594 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2595 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2596 /* nonzeros for local mat */ 2597 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2598 for (i=0;i<pcis->n;i++) nnz[i]=1; 2599 for (i=n_vertices;i<total_counts_cc;i++) { 2600 if (PetscBTLookup(change_basis,i)) { 2601 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2602 if (PetscBTLookup(qr_needed_idx,i)) { 2603 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2604 } else { 2605 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2606 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2607 } 2608 } 2609 } 2610 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2611 ierr = PetscFree(nnz);CHKERRQ(ierr); 2612 /* Set initial identity in the matrix */ 2613 for (i=0;i<pcis->n;i++) { 2614 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2615 } 2616 2617 if (pcbddc->dbg_flag) { 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2620 } 2621 2622 2623 /* Now we loop on the constraints which need a change of basis */ 2624 /* 2625 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2626 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2627 2628 Basic blocks of change of basis matrix T computed by 2629 2630 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2631 2632 | 1 0 ... 0 s_1/S | 2633 | 0 1 ... 0 s_2/S | 2634 | ... | 2635 | 0 ... 1 s_{n-1}/S | 2636 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2637 2638 with S = \sum_{i=1}^n s_i^2 2639 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2640 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2641 2642 - QR decomposition of constraints otherwise 2643 */ 2644 if (qr_needed) { 2645 /* space to store Q */ 2646 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2647 /* first we issue queries for optimal work */ 2648 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2651 lqr_work = -1; 2652 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2653 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2654 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2655 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2656 lgqr_work = -1; 2657 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2658 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2659 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2661 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2662 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2663 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2664 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2665 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2666 /* array to store scaling factors for reflectors */ 2667 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2668 /* array to store rhs and solution of triangular solver */ 2669 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2670 /* allocating workspace for check */ 2671 if (pcbddc->dbg_flag) { 2672 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2673 } 2674 } 2675 /* array to store whether a node is primal or not */ 2676 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2677 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2678 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2679 if (i != total_primal_vertices) { 2680 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2681 } 2682 for (i=0;i<total_primal_vertices;i++) { 2683 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2684 } 2685 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2686 2687 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2688 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2689 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2690 if (PetscBTLookup(change_basis,total_counts)) { 2691 /* get constraint info */ 2692 primal_dofs = constraints_n[total_counts]; 2693 dual_dofs = size_of_constraint-primal_dofs; 2694 2695 if (pcbddc->dbg_flag) { 2696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2697 } 2698 2699 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2700 2701 /* copy quadrature constraints for change of basis check */ 2702 if (pcbddc->dbg_flag) { 2703 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2704 } 2705 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2706 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2707 2708 /* compute QR decomposition of constraints */ 2709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2713 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2715 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2716 2717 /* explictly compute R^-T */ 2718 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2719 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2724 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2725 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2726 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2728 2729 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2734 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2735 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2736 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2737 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2738 2739 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2740 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2741 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2743 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2748 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2749 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2750 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2751 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2752 2753 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2754 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2755 /* insert cols for primal dofs */ 2756 for (j=0;j<primal_dofs;j++) { 2757 start_vals = &qr_basis[j*size_of_constraint]; 2758 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2759 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2760 } 2761 /* insert cols for dual dofs */ 2762 for (j=0,k=0;j<dual_dofs;k++) { 2763 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2764 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2765 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2766 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2767 j++; 2768 } 2769 } 2770 2771 /* check change of basis */ 2772 if (pcbddc->dbg_flag) { 2773 PetscInt ii,jj; 2774 PetscBool valid_qr=PETSC_TRUE; 2775 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2776 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2781 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2782 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2784 for (jj=0;jj<size_of_constraint;jj++) { 2785 for (ii=0;ii<primal_dofs;ii++) { 2786 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2787 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2788 } 2789 } 2790 if (!valid_qr) { 2791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2792 for (jj=0;jj<size_of_constraint;jj++) { 2793 for (ii=0;ii<primal_dofs;ii++) { 2794 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2795 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2796 } 2797 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2798 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2799 } 2800 } 2801 } 2802 } else { 2803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2804 } 2805 } 2806 } else { /* simple transformation block */ 2807 PetscInt row,col; 2808 PetscScalar val,norm; 2809 2810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2811 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2812 for (j=0;j<size_of_constraint;j++) { 2813 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2814 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2815 if (!PetscBTLookup(is_primal,row_B)) { 2816 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2817 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2818 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2819 } else { 2820 for (k=0;k<size_of_constraint;k++) { 2821 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2822 if (row != col) { 2823 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2824 } else { 2825 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2826 } 2827 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2828 } 2829 } 2830 } 2831 if (pcbddc->dbg_flag) { 2832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2833 } 2834 } 2835 } else { 2836 if (pcbddc->dbg_flag) { 2837 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2838 } 2839 } 2840 } 2841 2842 /* free workspace */ 2843 if (qr_needed) { 2844 if (pcbddc->dbg_flag) { 2845 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2846 } 2847 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2848 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2849 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2850 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2852 } 2853 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2854 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 2857 /* assembling of global change of variable */ 2858 { 2859 Mat tmat; 2860 PetscInt bs; 2861 2862 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2863 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2864 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2865 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2867 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2868 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2869 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2870 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2871 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2872 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2873 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2874 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2875 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2876 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2878 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2879 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2880 } 2881 /* check */ 2882 if (pcbddc->dbg_flag) { 2883 PetscReal error; 2884 Vec x,x_change; 2885 2886 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2887 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2888 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2889 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2890 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2892 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2893 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2895 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2896 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2897 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2898 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2900 ierr = VecDestroy(&x);CHKERRQ(ierr); 2901 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2902 } 2903 2904 /* adapt sub_schurs computed (if any) */ 2905 if (pcbddc->use_deluxe_scaling) { 2906 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2907 if (sub_schurs->S_Ej_all) { 2908 Mat S_new,tmat; 2909 IS is_all_N; 2910 2911 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2912 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2913 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2914 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2915 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2916 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2917 sub_schurs->S_Ej_all = S_new; 2918 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2919 if (sub_schurs->sum_S_Ej_all) { 2920 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2921 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2922 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2923 sub_schurs->sum_S_Ej_all = S_new; 2924 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2925 } 2926 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2927 } 2928 } 2929 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2930 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2931 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2932 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2933 } 2934 2935 /* set up change of basis context */ 2936 if (pcbddc->ChangeOfBasisMatrix) { 2937 PCBDDCChange_ctx change_ctx; 2938 2939 if (!pcbddc->new_global_mat) { 2940 PetscInt global_size,local_size; 2941 2942 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2943 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2944 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2946 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2947 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2948 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2949 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2950 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2951 } else { 2952 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2953 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2954 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2955 } 2956 if (!pcbddc->user_ChangeOfBasisMatrix) { 2957 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2958 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2959 } else { 2960 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2961 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2962 } 2963 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2964 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2965 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 } 2968 2969 /* check if a new primal space has been introduced */ 2970 pcbddc->new_primal_space_local = PETSC_TRUE; 2971 if (olocal_primal_size == pcbddc->local_primal_size) { 2972 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2973 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2974 if (!pcbddc->new_primal_space_local) { 2975 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2976 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2977 } 2978 } 2979 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2980 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2981 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2982 2983 /* flush dbg viewer */ 2984 if (pcbddc->dbg_flag) { 2985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2986 } 2987 2988 /* free workspace */ 2989 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2990 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2991 if (!pcbddc->adaptive_selection) { 2992 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2993 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2994 } else { 2995 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2996 pcbddc->adaptive_constraints_idxs_ptr, 2997 pcbddc->adaptive_constraints_data_ptr, 2998 pcbddc->adaptive_constraints_idxs, 2999 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3000 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3001 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3008 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3009 { 3010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3011 PC_IS *pcis = (PC_IS*)pc->data; 3012 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3013 PetscInt ierr,i,vertex_size,N; 3014 PetscViewer viewer=pcbddc->dbg_viewer; 3015 3016 PetscFunctionBegin; 3017 /* Reset previously computed graph */ 3018 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3019 /* Init local Graph struct */ 3020 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3021 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3022 3023 /* Check validity of the csr graph passed in by the user */ 3024 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3025 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3026 } 3027 3028 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3029 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3030 PetscInt *xadj,*adjncy; 3031 PetscInt nvtxs; 3032 PetscBool flg_row=PETSC_FALSE; 3033 3034 if (pcbddc->use_local_adj) { 3035 3036 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3037 if (flg_row) { 3038 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3039 pcbddc->computed_rowadj = PETSC_TRUE; 3040 } 3041 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3043 IS is_dummy; 3044 ISLocalToGlobalMapping l2gmap_dummy; 3045 PetscInt j,sum; 3046 PetscInt *cxadj,*cadjncy; 3047 const PetscInt *idxs; 3048 PCBDDCGraph graph; 3049 PetscBT is_on_boundary; 3050 3051 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3052 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3053 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3054 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3055 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3056 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3057 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3058 if (flg_row) { 3059 graph->xadj = xadj; 3060 graph->adjncy = adjncy; 3061 } 3062 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3063 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3064 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3065 3066 if (pcbddc->dbg_flag) { 3067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3068 for (i=0;i<graph->ncc;i++) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3070 } 3071 } 3072 3073 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3074 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3075 for (i=0;i<pcis->n_B;i++) { 3076 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3077 } 3078 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3079 3080 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3081 sum = 0; 3082 for (i=0;i<graph->ncc;i++) { 3083 PetscInt sizecc = 0; 3084 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3085 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3086 sizecc++; 3087 } 3088 } 3089 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3090 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3091 cxadj[graph->queue[j]] = sizecc; 3092 } 3093 } 3094 sum += sizecc*sizecc; 3095 } 3096 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3097 sum = 0; 3098 for (i=0;i<pcis->n;i++) { 3099 PetscInt temp = cxadj[i]; 3100 cxadj[i] = sum; 3101 sum += temp; 3102 } 3103 cxadj[pcis->n] = sum; 3104 for (i=0;i<graph->ncc;i++) { 3105 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3106 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3107 PetscInt k,sizecc = 0; 3108 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3109 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3110 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3111 sizecc++; 3112 } 3113 } 3114 } 3115 } 3116 } 3117 if (sum) { 3118 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3119 } else { 3120 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3121 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3122 } 3123 graph->xadj = 0; 3124 graph->adjncy = 0; 3125 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3126 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3127 } 3128 } 3129 if (pcbddc->dbg_flag) { 3130 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3131 } 3132 3133 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3134 vertex_size = 1; 3135 if (pcbddc->user_provided_isfordofs) { 3136 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3137 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3138 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3139 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3140 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3141 } 3142 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3143 pcbddc->n_ISForDofs = 0; 3144 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3145 } 3146 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3147 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3148 } else { 3149 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3150 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3151 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3152 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3153 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3154 } 3155 } 3156 } 3157 3158 /* Setup of Graph */ 3159 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3160 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3161 } 3162 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3163 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3164 } 3165 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3166 3167 /* Graph's connected components analysis */ 3168 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3169 3170 /* print some info to stdout */ 3171 if (pcbddc->dbg_flag) { 3172 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3173 } 3174 3175 /* mark topography has done */ 3176 pcbddc->recompute_topography = PETSC_FALSE; 3177 PetscFunctionReturn(0); 3178 } 3179 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCSubsetNumbering" 3182 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3183 { 3184 Vec local_vec,global_vec; 3185 IS seqis,paris; 3186 VecScatter scatter_ctx; 3187 PetscScalar *array; 3188 PetscInt *temp_global_dofs; 3189 PetscScalar globalsum; 3190 PetscInt i,j,s; 3191 PetscInt nlocals,first_index,old_index,max_local,max_global; 3192 PetscMPIInt rank_prec_comm,size_prec_comm; 3193 PetscInt *dof_sizes,*dof_displs; 3194 PetscBool first_found; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 /* mpi buffers */ 3199 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3200 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3201 j = ( !rank_prec_comm ? size_prec_comm : 0); 3202 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3203 /* get maximum size of subset */ 3204 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3205 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3206 max_local = 0; 3207 for (i=0;i<n_local_dofs;i++) { 3208 if (max_local < temp_global_dofs[i] ) { 3209 max_local = temp_global_dofs[i]; 3210 } 3211 } 3212 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3213 max_global++; 3214 max_local = 0; 3215 for (i=0;i<n_local_dofs;i++) { 3216 if (max_local < local_dofs[i] ) { 3217 max_local = local_dofs[i]; 3218 } 3219 } 3220 max_local++; 3221 /* allocate workspace */ 3222 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3223 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3224 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3225 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3226 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3227 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3228 /* create scatter */ 3229 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3230 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3231 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3232 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3233 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3234 /* init array */ 3235 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3236 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3237 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3238 if (local_dofs_mult) { 3239 for (i=0;i<n_local_dofs;i++) { 3240 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3241 } 3242 } else { 3243 for (i=0;i<n_local_dofs;i++) { 3244 array[local_dofs[i]]=1.0; 3245 } 3246 } 3247 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3248 /* scatter into global vec and get total number of global dofs */ 3249 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3251 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3252 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3253 /* Fill global_vec with cumulative function for global numbering */ 3254 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3255 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3256 nlocals = 0; 3257 first_index = -1; 3258 first_found = PETSC_FALSE; 3259 for (i=0;i<s;i++) { 3260 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3261 first_found = PETSC_TRUE; 3262 first_index = i; 3263 } 3264 nlocals += (PetscInt)PetscRealPart(array[i]); 3265 } 3266 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3267 if (!rank_prec_comm) { 3268 dof_displs[0]=0; 3269 for (i=1;i<size_prec_comm;i++) { 3270 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3271 } 3272 } 3273 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3274 if (first_found) { 3275 array[first_index] += (PetscScalar)nlocals; 3276 old_index = first_index; 3277 for (i=first_index+1;i<s;i++) { 3278 if (PetscRealPart(array[i]) > 0.1) { 3279 array[i] += array[old_index]; 3280 old_index = i; 3281 } 3282 } 3283 } 3284 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3285 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3286 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3287 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3288 /* get global ordering of local dofs */ 3289 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3290 if (local_dofs_mult) { 3291 for (i=0;i<n_local_dofs;i++) { 3292 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3293 } 3294 } else { 3295 for (i=0;i<n_local_dofs;i++) { 3296 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3297 } 3298 } 3299 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3300 /* free workspace */ 3301 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3302 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3303 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3304 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3305 /* return pointer to global ordering of local dofs */ 3306 *global_numbering_subset = temp_global_dofs; 3307 PetscFunctionReturn(0); 3308 } 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3312 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3313 { 3314 PetscInt i,j; 3315 PetscScalar *alphas; 3316 PetscErrorCode ierr; 3317 3318 PetscFunctionBegin; 3319 /* this implements stabilized Gram-Schmidt */ 3320 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3321 for (i=0;i<n;i++) { 3322 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3323 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3324 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3325 } 3326 ierr = PetscFree(alphas);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatISGetSubassemblingPattern" 3332 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3333 { 3334 Mat subdomain_adj; 3335 IS new_ranks,ranks_send_to; 3336 MatPartitioning partitioner; 3337 Mat_IS *matis; 3338 PetscInt n_neighs,*neighs,*n_shared,**shared; 3339 PetscInt prank; 3340 PetscMPIInt size,rank,color; 3341 PetscInt *xadj,*adjncy,*oldranks; 3342 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3343 PetscInt i,local_size,threshold=0; 3344 PetscErrorCode ierr; 3345 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3346 PetscSubcomm subcomm; 3347 3348 PetscFunctionBegin; 3349 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3350 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3351 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3352 3353 /* Get info on mapping */ 3354 matis = (Mat_IS*)(mat->data); 3355 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3356 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3357 3358 /* build local CSR graph of subdomains' connectivity */ 3359 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3360 xadj[0] = 0; 3361 xadj[1] = PetscMax(n_neighs-1,0); 3362 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3363 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3364 3365 if (threshold) { 3366 PetscInt xadj_count = 0; 3367 for (i=1;i<n_neighs;i++) { 3368 if (n_shared[i] > threshold) { 3369 adjncy[xadj_count] = neighs[i]; 3370 adjncy_wgt[xadj_count] = n_shared[i]; 3371 xadj_count++; 3372 } 3373 } 3374 xadj[1] = xadj_count; 3375 } else { 3376 if (xadj[1]) { 3377 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3378 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3379 } 3380 } 3381 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3382 if (use_square) { 3383 for (i=0;i<xadj[1];i++) { 3384 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3385 } 3386 } 3387 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3388 3389 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3390 3391 /* 3392 Restrict work on active processes only. 3393 */ 3394 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3395 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3396 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3397 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3398 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3399 if (color) { 3400 ierr = PetscFree(xadj);CHKERRQ(ierr); 3401 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3402 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3403 } else { 3404 PetscInt coarsening_ratio; 3405 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3406 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3407 prank = rank; 3408 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3409 /* 3410 for (i=0;i<size;i++) { 3411 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3412 } 3413 */ 3414 for (i=0;i<xadj[1];i++) { 3415 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3416 } 3417 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3418 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3419 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3420 3421 /* Partition */ 3422 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3423 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3424 if (use_vwgt) { 3425 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3426 v_wgt[0] = local_size; 3427 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3428 } 3429 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3430 coarsening_ratio = size/n_subdomains; 3431 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3432 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3433 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3434 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3435 3436 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3437 if (contiguous) { 3438 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3439 } else { 3440 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3441 } 3442 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3443 /* clean up */ 3444 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3445 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3446 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3447 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3448 } 3449 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3450 3451 /* assemble parallel IS for sends */ 3452 i = 1; 3453 if (color) i=0; 3454 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3455 3456 /* get back IS */ 3457 *is_sends = ranks_send_to; 3458 PetscFunctionReturn(0); 3459 } 3460 3461 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3462 3463 #undef __FUNCT__ 3464 #define __FUNCT__ "MatISSubassemble" 3465 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3466 { 3467 Mat local_mat; 3468 Mat_IS *matis; 3469 IS is_sends_internal; 3470 PetscInt rows,cols,new_local_rows; 3471 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3472 PetscBool ismatis,isdense,newisdense,destroy_mat; 3473 ISLocalToGlobalMapping l2gmap; 3474 PetscInt* l2gmap_indices; 3475 const PetscInt* is_indices; 3476 MatType new_local_type; 3477 /* buffers */ 3478 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3479 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3480 PetscInt *recv_buffer_idxs_local; 3481 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3482 /* MPI */ 3483 MPI_Comm comm,comm_n; 3484 PetscSubcomm subcomm; 3485 PetscMPIInt n_sends,n_recvs,commsize; 3486 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3487 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3488 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3489 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3490 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 /* TODO: add missing checks */ 3495 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3496 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3497 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3498 PetscValidLogicalCollectiveInt(mat,nis,7); 3499 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3500 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3501 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3502 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3503 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3504 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3505 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3506 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3507 PetscInt mrows,mcols,mnrows,mncols; 3508 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3509 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3510 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3511 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3512 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3513 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3514 } 3515 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3516 PetscValidLogicalCollectiveInt(mat,bs,0); 3517 /* prepare IS for sending if not provided */ 3518 if (!is_sends) { 3519 PetscBool pcontig = PETSC_TRUE; 3520 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3521 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3522 } else { 3523 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3524 is_sends_internal = is_sends; 3525 } 3526 3527 /* get pointer of MATIS data */ 3528 matis = (Mat_IS*)mat->data; 3529 3530 /* get comm */ 3531 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3532 3533 /* compute number of sends */ 3534 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3535 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3536 3537 /* compute number of receives */ 3538 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3539 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3540 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3541 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3542 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3543 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3544 ierr = PetscFree(iflags);CHKERRQ(ierr); 3545 3546 /* restrict comm if requested */ 3547 subcomm = 0; 3548 destroy_mat = PETSC_FALSE; 3549 if (restrict_comm) { 3550 PetscMPIInt color,subcommsize; 3551 3552 color = 0; 3553 if (restrict_full) { 3554 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3555 } else { 3556 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3557 } 3558 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3559 subcommsize = commsize - subcommsize; 3560 /* check if reuse has been requested */ 3561 if (reuse == MAT_REUSE_MATRIX) { 3562 if (*mat_n) { 3563 PetscMPIInt subcommsize2; 3564 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3565 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3566 comm_n = PetscObjectComm((PetscObject)*mat_n); 3567 } else { 3568 comm_n = PETSC_COMM_SELF; 3569 } 3570 } else { /* MAT_INITIAL_MATRIX */ 3571 PetscMPIInt rank; 3572 3573 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3574 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3575 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3576 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3577 comm_n = PetscSubcommChild(subcomm); 3578 } 3579 /* flag to destroy *mat_n if not significative */ 3580 if (color) destroy_mat = PETSC_TRUE; 3581 } else { 3582 comm_n = comm; 3583 } 3584 3585 /* prepare send/receive buffers */ 3586 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3587 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3588 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3589 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3590 if (nis) { 3591 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3592 } 3593 3594 /* Get data from local matrices */ 3595 if (!isdense) { 3596 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3597 /* TODO: See below some guidelines on how to prepare the local buffers */ 3598 /* 3599 send_buffer_vals should contain the raw values of the local matrix 3600 send_buffer_idxs should contain: 3601 - MatType_PRIVATE type 3602 - PetscInt size_of_l2gmap 3603 - PetscInt global_row_indices[size_of_l2gmap] 3604 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3605 */ 3606 } else { 3607 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3608 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3609 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3610 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3611 send_buffer_idxs[1] = i; 3612 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3613 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3614 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3615 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3616 for (i=0;i<n_sends;i++) { 3617 ilengths_vals[is_indices[i]] = len*len; 3618 ilengths_idxs[is_indices[i]] = len+2; 3619 } 3620 } 3621 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3622 /* additional is (if any) */ 3623 if (nis) { 3624 PetscMPIInt psum; 3625 PetscInt j; 3626 for (j=0,psum=0;j<nis;j++) { 3627 PetscInt plen; 3628 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3629 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3630 psum += len+1; /* indices + lenght */ 3631 } 3632 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3633 for (j=0,psum=0;j<nis;j++) { 3634 PetscInt plen; 3635 const PetscInt *is_array_idxs; 3636 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3637 send_buffer_idxs_is[psum] = plen; 3638 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3639 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3640 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3641 psum += plen+1; /* indices + lenght */ 3642 } 3643 for (i=0;i<n_sends;i++) { 3644 ilengths_idxs_is[is_indices[i]] = psum; 3645 } 3646 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3647 } 3648 3649 buf_size_idxs = 0; 3650 buf_size_vals = 0; 3651 buf_size_idxs_is = 0; 3652 for (i=0;i<n_recvs;i++) { 3653 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3654 buf_size_vals += (PetscInt)olengths_vals[i]; 3655 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3656 } 3657 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3658 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3659 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3660 3661 /* get new tags for clean communications */ 3662 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3663 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3664 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3665 3666 /* allocate for requests */ 3667 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3668 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3669 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3670 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3671 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3672 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3673 3674 /* communications */ 3675 ptr_idxs = recv_buffer_idxs; 3676 ptr_vals = recv_buffer_vals; 3677 ptr_idxs_is = recv_buffer_idxs_is; 3678 for (i=0;i<n_recvs;i++) { 3679 source_dest = onodes[i]; 3680 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3681 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3682 ptr_idxs += olengths_idxs[i]; 3683 ptr_vals += olengths_vals[i]; 3684 if (nis) { 3685 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3686 ptr_idxs_is += olengths_idxs_is[i]; 3687 } 3688 } 3689 for (i=0;i<n_sends;i++) { 3690 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3691 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3692 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3693 if (nis) { 3694 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3695 } 3696 } 3697 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3698 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3699 3700 /* assemble new l2g map */ 3701 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3702 ptr_idxs = recv_buffer_idxs; 3703 new_local_rows = 0; 3704 for (i=0;i<n_recvs;i++) { 3705 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3706 ptr_idxs += olengths_idxs[i]; 3707 } 3708 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3709 ptr_idxs = recv_buffer_idxs; 3710 new_local_rows = 0; 3711 for (i=0;i<n_recvs;i++) { 3712 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3713 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3714 ptr_idxs += olengths_idxs[i]; 3715 } 3716 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3717 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3718 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3719 3720 /* infer new local matrix type from received local matrices type */ 3721 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3722 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3723 if (n_recvs) { 3724 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3725 ptr_idxs = recv_buffer_idxs; 3726 for (i=0;i<n_recvs;i++) { 3727 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3728 new_local_type_private = MATAIJ_PRIVATE; 3729 break; 3730 } 3731 ptr_idxs += olengths_idxs[i]; 3732 } 3733 switch (new_local_type_private) { 3734 case MATDENSE_PRIVATE: 3735 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3736 new_local_type = MATSEQAIJ; 3737 bs = 1; 3738 } else { /* if I receive only 1 dense matrix */ 3739 new_local_type = MATSEQDENSE; 3740 bs = 1; 3741 } 3742 break; 3743 case MATAIJ_PRIVATE: 3744 new_local_type = MATSEQAIJ; 3745 bs = 1; 3746 break; 3747 case MATBAIJ_PRIVATE: 3748 new_local_type = MATSEQBAIJ; 3749 break; 3750 case MATSBAIJ_PRIVATE: 3751 new_local_type = MATSEQSBAIJ; 3752 break; 3753 default: 3754 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3755 break; 3756 } 3757 } else { /* by default, new_local_type is seqdense */ 3758 new_local_type = MATSEQDENSE; 3759 bs = 1; 3760 } 3761 3762 /* create MATIS object if needed */ 3763 if (reuse == MAT_INITIAL_MATRIX) { 3764 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3765 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3766 } else { 3767 /* it also destroys the local matrices */ 3768 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3769 } 3770 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3771 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3772 3773 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3774 3775 /* Global to local map of received indices */ 3776 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3777 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3778 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3779 3780 /* restore attributes -> type of incoming data and its size */ 3781 buf_size_idxs = 0; 3782 for (i=0;i<n_recvs;i++) { 3783 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3784 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3785 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3786 } 3787 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3788 3789 /* set preallocation */ 3790 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3791 if (!newisdense) { 3792 PetscInt *new_local_nnz=0; 3793 3794 ptr_vals = recv_buffer_vals; 3795 ptr_idxs = recv_buffer_idxs_local; 3796 if (n_recvs) { 3797 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3798 } 3799 for (i=0;i<n_recvs;i++) { 3800 PetscInt j; 3801 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3802 for (j=0;j<*(ptr_idxs+1);j++) { 3803 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3804 } 3805 } else { 3806 /* TODO */ 3807 } 3808 ptr_idxs += olengths_idxs[i]; 3809 } 3810 if (new_local_nnz) { 3811 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3812 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3813 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3814 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3815 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3816 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3817 } else { 3818 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3819 } 3820 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3821 } else { 3822 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3823 } 3824 3825 /* set values */ 3826 ptr_vals = recv_buffer_vals; 3827 ptr_idxs = recv_buffer_idxs_local; 3828 for (i=0;i<n_recvs;i++) { 3829 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3830 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3831 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3832 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3834 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3835 } else { 3836 /* TODO */ 3837 } 3838 ptr_idxs += olengths_idxs[i]; 3839 ptr_vals += olengths_vals[i]; 3840 } 3841 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3842 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3844 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3846 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3847 3848 #if 0 3849 if (!restrict_comm) { /* check */ 3850 Vec lvec,rvec; 3851 PetscReal infty_error; 3852 3853 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3854 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3855 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3856 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3857 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3858 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3859 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3860 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3861 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3862 } 3863 #endif 3864 3865 /* assemble new additional is (if any) */ 3866 if (nis) { 3867 PetscInt **temp_idxs,*count_is,j,psum; 3868 3869 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3870 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3871 ptr_idxs = recv_buffer_idxs_is; 3872 psum = 0; 3873 for (i=0;i<n_recvs;i++) { 3874 for (j=0;j<nis;j++) { 3875 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3876 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3877 psum += plen; 3878 ptr_idxs += plen+1; /* shift pointer to received data */ 3879 } 3880 } 3881 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3882 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3883 for (i=1;i<nis;i++) { 3884 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3885 } 3886 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3887 ptr_idxs = recv_buffer_idxs_is; 3888 for (i=0;i<n_recvs;i++) { 3889 for (j=0;j<nis;j++) { 3890 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3891 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3892 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3893 ptr_idxs += plen+1; /* shift pointer to received data */ 3894 } 3895 } 3896 for (i=0;i<nis;i++) { 3897 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3898 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3899 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3900 } 3901 ierr = PetscFree(count_is);CHKERRQ(ierr); 3902 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3903 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3904 } 3905 /* free workspace */ 3906 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3907 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3908 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3909 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3910 if (isdense) { 3911 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3912 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3913 } else { 3914 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3915 } 3916 if (nis) { 3917 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3918 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3919 } 3920 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3921 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3922 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3923 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3924 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3925 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3926 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3927 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3928 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3929 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3930 ierr = PetscFree(onodes);CHKERRQ(ierr); 3931 if (nis) { 3932 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3933 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3934 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3935 } 3936 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3937 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3938 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3939 for (i=0;i<nis;i++) { 3940 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3941 } 3942 *mat_n = NULL; 3943 } 3944 PetscFunctionReturn(0); 3945 } 3946 3947 /* temporary hack into ksp private data structure */ 3948 #include <petsc/private/kspimpl.h> 3949 3950 #undef __FUNCT__ 3951 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3952 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3953 { 3954 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3955 PC_IS *pcis = (PC_IS*)pc->data; 3956 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3957 MatNullSpace CoarseNullSpace=NULL; 3958 ISLocalToGlobalMapping coarse_islg; 3959 IS coarse_is,*isarray; 3960 PetscInt i,im_active=-1,active_procs=-1; 3961 PetscInt nis,nisdofs,nisneu; 3962 PC pc_temp; 3963 PCType coarse_pc_type; 3964 KSPType coarse_ksp_type; 3965 PetscBool multilevel_requested,multilevel_allowed; 3966 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3967 Mat t_coarse_mat_is; 3968 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3969 PetscMPIInt all_procs; 3970 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3971 PetscBool compute_vecs = PETSC_FALSE; 3972 PetscScalar *array; 3973 PetscErrorCode ierr; 3974 3975 PetscFunctionBegin; 3976 /* Assign global numbering to coarse dofs */ 3977 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3978 PetscInt ocoarse_size; 3979 compute_vecs = PETSC_TRUE; 3980 ocoarse_size = pcbddc->coarse_size; 3981 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3982 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3983 /* see if we can avoid some work */ 3984 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3985 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3986 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3987 coarse_reuse = PETSC_FALSE; 3988 } else { /* we can safely reuse already computed coarse matrix */ 3989 coarse_reuse = PETSC_TRUE; 3990 } 3991 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3992 coarse_reuse = PETSC_FALSE; 3993 } 3994 /* reset any subassembling information */ 3995 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3996 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3997 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3998 coarse_reuse = PETSC_TRUE; 3999 } 4000 4001 /* count "active" (i.e. with positive local size) and "void" processes */ 4002 im_active = !!(pcis->n); 4003 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4004 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4005 void_procs = all_procs-active_procs; 4006 csin_type_simple = PETSC_TRUE; 4007 redist = PETSC_FALSE; 4008 if (pcbddc->current_level && void_procs) { 4009 csin_ml = PETSC_TRUE; 4010 ncoarse_ml = void_procs; 4011 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4012 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4013 csin_ds = PETSC_TRUE; 4014 ncoarse_ds = pcbddc->redistribute_coarse; 4015 redist = PETSC_TRUE; 4016 } else { 4017 csin_ds = PETSC_TRUE; 4018 ncoarse_ds = active_procs; 4019 redist = PETSC_TRUE; 4020 } 4021 } else { 4022 csin_ml = PETSC_FALSE; 4023 ncoarse_ml = all_procs; 4024 if (void_procs) { 4025 csin_ds = PETSC_TRUE; 4026 ncoarse_ds = void_procs; 4027 csin_type_simple = PETSC_FALSE; 4028 } else { 4029 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4030 csin_ds = PETSC_TRUE; 4031 ncoarse_ds = pcbddc->redistribute_coarse; 4032 redist = PETSC_TRUE; 4033 } else { 4034 csin_ds = PETSC_FALSE; 4035 ncoarse_ds = all_procs; 4036 } 4037 } 4038 } 4039 4040 /* 4041 test if we can go multilevel: three conditions must be satisfied: 4042 - we have not exceeded the number of levels requested 4043 - we can actually subassemble the active processes 4044 - we can find a suitable number of MPI processes where we can place the subassembled problem 4045 */ 4046 multilevel_allowed = PETSC_FALSE; 4047 multilevel_requested = PETSC_FALSE; 4048 if (pcbddc->current_level < pcbddc->max_levels) { 4049 multilevel_requested = PETSC_TRUE; 4050 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4051 multilevel_allowed = PETSC_FALSE; 4052 } else { 4053 multilevel_allowed = PETSC_TRUE; 4054 } 4055 } 4056 /* determine number of process partecipating to coarse solver */ 4057 if (multilevel_allowed) { 4058 ncoarse = ncoarse_ml; 4059 csin = csin_ml; 4060 redist = PETSC_FALSE; 4061 } else { 4062 ncoarse = ncoarse_ds; 4063 csin = csin_ds; 4064 } 4065 4066 /* creates temporary l2gmap and IS for coarse indexes */ 4067 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4068 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4069 4070 /* creates temporary MATIS object for coarse matrix */ 4071 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4072 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4073 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4074 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4075 #if 0 4076 { 4077 PetscViewer viewer; 4078 char filename[256]; 4079 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4080 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4081 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4082 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4083 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4084 } 4085 #endif 4086 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4087 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4088 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4089 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4090 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4091 4092 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4093 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4094 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4095 const PetscInt *idxs; 4096 ISLocalToGlobalMapping tmap; 4097 4098 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4099 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4100 /* allocate space for temporary storage */ 4101 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4102 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4103 /* allocate for IS array */ 4104 nisdofs = pcbddc->n_ISForDofsLocal; 4105 nisneu = !!pcbddc->NeumannBoundariesLocal; 4106 nis = nisdofs + nisneu; 4107 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4108 /* dofs splitting */ 4109 for (i=0;i<nisdofs;i++) { 4110 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4111 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4112 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4113 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4114 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4115 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4117 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4118 } 4119 /* neumann boundaries */ 4120 if (pcbddc->NeumannBoundariesLocal) { 4121 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4122 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4123 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4124 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4125 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4126 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4127 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4128 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4129 } 4130 /* free memory */ 4131 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4132 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4133 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4134 } else { 4135 nis = 0; 4136 nisdofs = 0; 4137 nisneu = 0; 4138 isarray = NULL; 4139 } 4140 /* destroy no longer needed map */ 4141 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4142 4143 /* restrict on coarse candidates (if needed) */ 4144 coarse_mat_is = NULL; 4145 if (csin) { 4146 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4147 if (redist) { 4148 PetscMPIInt rank; 4149 PetscInt spc,n_spc_p1,dest[1],destsize; 4150 4151 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4152 spc = active_procs/ncoarse; 4153 n_spc_p1 = active_procs%ncoarse; 4154 if (im_active) { 4155 destsize = 1; 4156 if (rank > n_spc_p1*(spc+1)-1) { 4157 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4158 } else { 4159 dest[0] = rank/(spc+1); 4160 } 4161 } else { 4162 destsize = 0; 4163 } 4164 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4165 } else if (csin_type_simple) { 4166 PetscMPIInt rank; 4167 PetscInt issize,isidx; 4168 4169 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4170 if (im_active) { 4171 issize = 1; 4172 isidx = (PetscInt)rank; 4173 } else { 4174 issize = 0; 4175 isidx = -1; 4176 } 4177 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4178 } else { /* get a suitable subassembling pattern from MATIS code */ 4179 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4180 } 4181 4182 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4183 if (!redist || ncoarse <= void_procs) { 4184 PetscInt ncoarse_cand,tissize,*nisindices; 4185 PetscInt *coarse_candidates; 4186 const PetscInt* tisindices; 4187 4188 /* get coarse candidates' ranks in pc communicator */ 4189 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4190 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4191 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4192 if (!coarse_candidates[i]) { 4193 coarse_candidates[ncoarse_cand++]=i; 4194 } 4195 } 4196 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4197 4198 4199 if (pcbddc->dbg_flag) { 4200 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4201 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4202 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4204 for (i=0;i<ncoarse_cand;i++) { 4205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4206 } 4207 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4208 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4209 } 4210 /* shift the pattern on coarse candidates */ 4211 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4212 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4213 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4214 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4215 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4216 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4217 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4218 } 4219 if (pcbddc->dbg_flag) { 4220 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4221 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4222 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4224 } 4225 } 4226 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4227 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4228 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4229 } else { /* this is the last level, so use just receiving processes in subcomm */ 4230 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4231 } 4232 } else { 4233 if (pcbddc->dbg_flag) { 4234 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4237 } 4238 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4239 coarse_mat_is = t_coarse_mat_is; 4240 } 4241 4242 /* create local to global scatters for coarse problem */ 4243 if (compute_vecs) { 4244 PetscInt lrows; 4245 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4246 if (coarse_mat_is) { 4247 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4248 } else { 4249 lrows = 0; 4250 } 4251 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4252 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4253 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4254 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4255 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4256 } 4257 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4258 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4259 4260 /* set defaults for coarse KSP and PC */ 4261 if (multilevel_allowed) { 4262 coarse_ksp_type = KSPRICHARDSON; 4263 coarse_pc_type = PCBDDC; 4264 } else { 4265 coarse_ksp_type = KSPPREONLY; 4266 coarse_pc_type = PCREDUNDANT; 4267 } 4268 4269 /* print some info if requested */ 4270 if (pcbddc->dbg_flag) { 4271 if (!multilevel_allowed) { 4272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4273 if (multilevel_requested) { 4274 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4275 } else if (pcbddc->max_levels) { 4276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4277 } 4278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4279 } 4280 } 4281 4282 /* create the coarse KSP object only once with defaults */ 4283 if (coarse_mat_is) { 4284 MatReuse coarse_mat_reuse; 4285 PetscViewer dbg_viewer = NULL; 4286 if (pcbddc->dbg_flag) { 4287 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4288 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4289 } 4290 if (!pcbddc->coarse_ksp) { 4291 char prefix[256],str_level[16]; 4292 size_t len; 4293 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4294 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4295 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4296 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4297 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4298 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4299 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4300 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4301 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4302 /* prefix */ 4303 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4304 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4305 if (!pcbddc->current_level) { 4306 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4307 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4308 } else { 4309 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4310 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4311 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4312 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4313 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4314 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4315 } 4316 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4317 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4318 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4319 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4320 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4321 /* allow user customization */ 4322 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4323 } 4324 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4325 if (nisdofs) { 4326 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4327 for (i=0;i<nisdofs;i++) { 4328 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4329 } 4330 } 4331 if (nisneu) { 4332 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4333 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4334 } 4335 4336 /* get some info after set from options */ 4337 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4338 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4339 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4340 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4341 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4342 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4343 isbddc = PETSC_FALSE; 4344 } 4345 if (isredundant) { 4346 KSP inner_ksp; 4347 PC inner_pc; 4348 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4349 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4350 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4351 } 4352 4353 /* assemble coarse matrix */ 4354 if (coarse_reuse) { 4355 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4356 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4357 coarse_mat_reuse = MAT_REUSE_MATRIX; 4358 } else { 4359 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4360 } 4361 if (isbddc || isnn) { 4362 if (pcbddc->coarsening_ratio > 1) { 4363 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4364 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4365 if (pcbddc->dbg_flag) { 4366 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4367 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4368 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4369 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4370 } 4371 } 4372 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4373 } else { 4374 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4375 coarse_mat = coarse_mat_is; 4376 } 4377 } else { 4378 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4379 } 4380 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4381 4382 /* propagate symmetry info of coarse matrix */ 4383 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4384 if (pc->pmat->symmetric_set) { 4385 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4386 } 4387 if (pc->pmat->hermitian_set) { 4388 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4389 } 4390 if (pc->pmat->spd_set) { 4391 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4392 } 4393 /* set operators */ 4394 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4395 if (pcbddc->dbg_flag) { 4396 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4397 } 4398 } else { /* processes non partecipating to coarse solver (if any) */ 4399 coarse_mat = 0; 4400 } 4401 ierr = PetscFree(isarray);CHKERRQ(ierr); 4402 #if 0 4403 { 4404 PetscViewer viewer; 4405 char filename[256]; 4406 sprintf(filename,"coarse_mat.m"); 4407 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4408 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4409 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4410 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4411 } 4412 #endif 4413 4414 /* Compute coarse null space (special handling by BDDC only) */ 4415 if (pcbddc->NullSpace) { 4416 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4417 } 4418 4419 if (pcbddc->coarse_ksp) { 4420 Vec crhs,csol; 4421 PetscBool ispreonly; 4422 if (CoarseNullSpace) { 4423 if (isbddc) { 4424 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4425 } else { 4426 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4427 } 4428 } 4429 /* setup coarse ksp */ 4430 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4431 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4432 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4433 /* hack */ 4434 if (!csol) { 4435 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4436 } 4437 if (!crhs) { 4438 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4439 } 4440 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4441 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4442 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4443 KSP check_ksp; 4444 KSPType check_ksp_type; 4445 PC check_pc; 4446 Vec check_vec,coarse_vec; 4447 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4448 PetscInt its; 4449 PetscBool compute_eigs; 4450 PetscReal *eigs_r,*eigs_c; 4451 PetscInt neigs; 4452 const char *prefix; 4453 4454 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4455 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4456 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4457 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4458 if (ispreonly) { 4459 check_ksp_type = KSPPREONLY; 4460 compute_eigs = PETSC_FALSE; 4461 } else { 4462 check_ksp_type = KSPGMRES; 4463 compute_eigs = PETSC_TRUE; 4464 } 4465 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4466 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4467 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4468 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4469 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4470 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4471 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4472 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4473 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4474 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4475 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4476 /* create random vec */ 4477 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4478 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4479 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4480 if (CoarseNullSpace) { 4481 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4482 } 4483 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4484 /* solve coarse problem */ 4485 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4486 if (CoarseNullSpace) { 4487 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4488 } 4489 /* set eigenvalue estimation if preonly has not been requested */ 4490 if (compute_eigs) { 4491 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4492 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4493 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4494 lambda_max = eigs_r[neigs-1]; 4495 lambda_min = eigs_r[0]; 4496 if (pcbddc->use_coarse_estimates) { 4497 if (lambda_max>lambda_min) { 4498 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4499 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4500 } 4501 } 4502 } 4503 4504 /* check coarse problem residual error */ 4505 if (pcbddc->dbg_flag) { 4506 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4507 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4508 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4509 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4510 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4511 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4512 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4513 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4514 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4515 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4516 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4517 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4518 if (compute_eigs) { 4519 PetscReal lambda_max_s,lambda_min_s; 4520 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4521 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4522 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4523 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4524 for (i=0;i<neigs;i++) { 4525 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4526 } 4527 } 4528 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4529 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4530 } 4531 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4532 if (compute_eigs) { 4533 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4534 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4535 } 4536 } 4537 } 4538 /* print additional info */ 4539 if (pcbddc->dbg_flag) { 4540 /* waits until all processes reaches this point */ 4541 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4544 } 4545 4546 /* free memory */ 4547 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4548 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4549 PetscFunctionReturn(0); 4550 } 4551 4552 #undef __FUNCT__ 4553 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4554 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4555 { 4556 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4557 PC_IS* pcis = (PC_IS*)pc->data; 4558 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4559 PetscInt i,local_size,coarse_size=0; 4560 PetscInt *local_primal_indices=NULL; 4561 PetscInt *t_local_primal_indices=NULL; 4562 PetscErrorCode ierr; 4563 4564 PetscFunctionBegin; 4565 /* Compute global number of coarse dofs */ 4566 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4568 } 4569 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4570 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4571 local_size = 0; 4572 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4573 PetscInt j; 4574 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4575 } 4576 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4577 if (local_size != pcbddc->local_primal_size) { 4578 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4579 } 4580 4581 /* check numbering */ 4582 if (pcbddc->dbg_flag) { 4583 PetscScalar coarsesum,*array; 4584 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4585 4586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4589 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4590 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4591 for (i=0;i<pcbddc->local_primal_size;i++) { 4592 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4593 } 4594 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4595 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4596 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4597 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4598 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4599 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4600 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4601 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4602 for (i=0;i<pcis->n;i++) { 4603 if (array[i] == 1.0) { 4604 set_error = PETSC_TRUE; 4605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4606 } 4607 } 4608 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4610 for (i=0;i<pcis->n;i++) { 4611 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4612 } 4613 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4614 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4615 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4616 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4617 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4619 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4620 PetscInt *gidxs; 4621 4622 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4623 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4624 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4627 for (i=0;i<pcbddc->local_primal_size;i++) { 4628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4629 } 4630 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4631 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4632 } 4633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4634 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4635 } 4636 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4637 /* get back data */ 4638 *coarse_size_n = coarse_size; 4639 *local_primal_indices_n = local_primal_indices; 4640 PetscFunctionReturn(0); 4641 } 4642 4643 #undef __FUNCT__ 4644 #define __FUNCT__ "PCBDDCGlobalToLocal" 4645 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4646 { 4647 IS localis_t; 4648 PetscInt i,lsize,*idxs,n; 4649 PetscScalar *vals; 4650 PetscErrorCode ierr; 4651 4652 PetscFunctionBegin; 4653 /* get indices in local ordering exploiting local to global map */ 4654 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4655 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4656 for (i=0;i<lsize;i++) vals[i] = 1.0; 4657 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4658 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4659 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4660 if (idxs) { /* multilevel guard */ 4661 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4662 } 4663 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4664 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4665 ierr = PetscFree(vals);CHKERRQ(ierr); 4666 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4667 /* now compute set in local ordering */ 4668 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4669 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4670 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4671 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4672 for (i=0,lsize=0;i<n;i++) { 4673 if (PetscRealPart(vals[i]) > 0.5) { 4674 lsize++; 4675 } 4676 } 4677 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4678 for (i=0,lsize=0;i<n;i++) { 4679 if (PetscRealPart(vals[i]) > 0.5) { 4680 idxs[lsize++] = i; 4681 } 4682 } 4683 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4684 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4685 *localis = localis_t; 4686 PetscFunctionReturn(0); 4687 } 4688 4689 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4690 #undef __FUNCT__ 4691 #define __FUNCT__ "PCBDDCMatMult_Private" 4692 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4693 { 4694 PCBDDCChange_ctx change_ctx; 4695 PetscErrorCode ierr; 4696 4697 PetscFunctionBegin; 4698 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4699 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4700 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4701 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4702 PetscFunctionReturn(0); 4703 } 4704 4705 #undef __FUNCT__ 4706 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4707 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4708 { 4709 PCBDDCChange_ctx change_ctx; 4710 PetscErrorCode ierr; 4711 4712 PetscFunctionBegin; 4713 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4714 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4715 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4716 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4717 PetscFunctionReturn(0); 4718 } 4719 4720 #undef __FUNCT__ 4721 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4722 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4723 { 4724 PC_IS *pcis=(PC_IS*)pc->data; 4725 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4726 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4727 Mat S_j; 4728 PetscInt *used_xadj,*used_adjncy; 4729 PetscBool free_used_adj; 4730 PetscErrorCode ierr; 4731 4732 PetscFunctionBegin; 4733 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4734 free_used_adj = PETSC_FALSE; 4735 if (pcbddc->sub_schurs_layers == -1) { 4736 used_xadj = NULL; 4737 used_adjncy = NULL; 4738 } else { 4739 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4740 used_xadj = pcbddc->mat_graph->xadj; 4741 used_adjncy = pcbddc->mat_graph->adjncy; 4742 } else if (pcbddc->computed_rowadj) { 4743 used_xadj = pcbddc->mat_graph->xadj; 4744 used_adjncy = pcbddc->mat_graph->adjncy; 4745 } else { 4746 PetscBool flg_row=PETSC_FALSE; 4747 const PetscInt *xadj,*adjncy; 4748 PetscInt nvtxs; 4749 4750 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4751 if (flg_row) { 4752 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4753 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4754 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4755 free_used_adj = PETSC_TRUE; 4756 } else { 4757 pcbddc->sub_schurs_layers = -1; 4758 used_xadj = NULL; 4759 used_adjncy = NULL; 4760 } 4761 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4762 } 4763 } 4764 4765 /* setup sub_schurs data */ 4766 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4767 if (!sub_schurs->use_mumps) { 4768 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4769 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4770 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4771 } else { 4772 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4773 if (!pcbddc->use_vertices && reuse_solvers) { 4774 PetscInt n_vertices; 4775 4776 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4777 reuse_solvers = (PetscBool)!n_vertices; 4778 } 4779 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4780 } 4781 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4782 4783 /* free adjacency */ 4784 if (free_used_adj) { 4785 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4786 } 4787 PetscFunctionReturn(0); 4788 } 4789 4790 #undef __FUNCT__ 4791 #define __FUNCT__ "PCBDDCInitSubSchurs" 4792 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4793 { 4794 PC_IS *pcis=(PC_IS*)pc->data; 4795 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4796 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4797 PCBDDCGraph graph; 4798 PetscErrorCode ierr; 4799 4800 PetscFunctionBegin; 4801 /* attach interface graph for determining subsets */ 4802 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4803 IS verticesIS,verticescomm; 4804 PetscInt vsize,*idxs; 4805 4806 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4807 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4808 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4809 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4810 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4811 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4812 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4813 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4814 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4815 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4816 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4817 /* 4818 if (pcbddc->dbg_flag) { 4819 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4820 } 4821 */ 4822 } else { 4823 graph = pcbddc->mat_graph; 4824 } 4825 4826 /* sub_schurs init */ 4827 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4828 4829 /* free graph struct */ 4830 if (pcbddc->sub_schurs_rebuild) { 4831 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4832 } 4833 PetscFunctionReturn(0); 4834 } 4835