1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 273 } else { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 291 for (j=0;j<B_N;j++) { 292 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 293 } 294 } 295 } 296 #if 0 297 for (j=0;j<B_neigs;j++) { 298 PetscBLASInt Blas_N,Blas_one = 1.0; 299 PetscScalar norm; 300 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 301 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 302 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 303 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 304 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 305 } else { 306 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 307 } 308 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 } 310 #endif 311 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 312 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 313 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 314 cum++; 315 } 316 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 317 /* shift for next computation */ 318 cumarray += subset_size*subset_size; 319 } 320 if (pcbddc->dbg_flag) { 321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 322 } 323 324 if (mss) { 325 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 326 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 327 /* destroy matrices (junk) */ 328 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 329 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 330 } 331 if (allocated_S_St) { 332 ierr = PetscFree2(S,St);CHKERRQ(ierr); 333 } 334 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 335 #if defined(PETSC_USE_COMPLEX) 336 ierr = PetscFree(rwork);CHKERRQ(ierr); 337 #endif 338 if (pcbddc->dbg_flag) { 339 PetscInt maxneigs_r; 340 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCSetUpSolvers" 348 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 349 { 350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 351 PetscScalar *coarse_submat_vals; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 /* Setup local scatters R_to_B and (optionally) R_to_D */ 356 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 357 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 358 359 /* Setup local neumann solver ksp_R */ 360 /* PCBDDCSetUpLocalScatters should be called first! */ 361 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 362 363 /* Change global null space passed in by the user if change of basis has been requested */ 364 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 365 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 366 } 367 368 /* 369 Setup local correction and local part of coarse basis. 370 Gives back the dense local part of the coarse matrix in column major ordering 371 */ 372 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 373 374 /* Compute total number of coarse nodes and setup coarse solver */ 375 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 376 377 /* free */ 378 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCBDDCResetCustomization" 384 PetscErrorCode PCBDDCResetCustomization(PC pc) 385 { 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PetscErrorCode ierr; 388 389 PetscFunctionBegin; 390 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 392 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 393 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 394 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 396 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 397 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 398 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 399 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 400 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetTopography" 406 PetscErrorCode PCBDDCResetTopography(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 415 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 416 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCResetSolvers" 422 PetscErrorCode PCBDDCResetSolvers(PC pc) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 PetscScalar *array; 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 430 if (pcbddc->coarse_phi_B) { 431 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 432 ierr = PetscFree(array);CHKERRQ(ierr); 433 } 434 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 435 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 436 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 438 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 442 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 445 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 446 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 447 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 448 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 449 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 450 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 453 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 454 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 455 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 456 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 462 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 463 { 464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 465 PC_IS *pcis = (PC_IS*)pc->data; 466 VecType impVecType; 467 PetscInt n_constraints,n_R,old_size; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 if (!pcbddc->ConstraintMatrix) { 472 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 473 } 474 /* get sizes */ 475 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 476 n_R = pcis->n-pcbddc->n_vertices; 477 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 478 /* local work vectors (try to avoid unneeded work)*/ 479 /* R nodes */ 480 old_size = -1; 481 if (pcbddc->vec1_R) { 482 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 483 } 484 if (n_R != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 487 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 488 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 489 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 490 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 491 } 492 /* local primal dofs */ 493 old_size = -1; 494 if (pcbddc->vec1_P) { 495 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 496 } 497 if (pcbddc->local_primal_size != old_size) { 498 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 502 } 503 /* local explicit constraints */ 504 old_size = -1; 505 if (pcbddc->vec1_C) { 506 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 507 } 508 if (n_constraints && n_constraints != old_size) { 509 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 511 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 512 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 513 } 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCBDDCSetUpCorrection" 519 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 520 { 521 PetscErrorCode ierr; 522 /* pointers to pcis and pcbddc */ 523 PC_IS* pcis = (PC_IS*)pc->data; 524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 526 /* submatrices of local problem */ 527 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 528 /* submatrices of local coarse problem */ 529 Mat S_VV,S_CV,S_VC,S_CC; 530 /* working matrices */ 531 Mat C_CR; 532 /* additional working stuff */ 533 PC pc_R; 534 Mat F; 535 PetscBool isLU,isCHOL,isILU; 536 537 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 538 PetscScalar *work; 539 PetscInt *idx_V_B; 540 PetscInt n,n_vertices,n_constraints; 541 PetscInt i,n_R,n_D,n_B; 542 PetscBool unsymmetric_check; 543 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 544 MatType impMatType; 545 /* some shortcuts to scalars */ 546 PetscScalar one=1.0,m_one=-1.0; 547 548 PetscFunctionBegin; 549 n_vertices = pcbddc->n_vertices; 550 n_constraints = pcbddc->local_primal_size-n_vertices; 551 /* Set Non-overlapping dimensions */ 552 n_B = pcis->n_B; 553 n_D = pcis->n - n_B; 554 n_R = pcis->n - n_vertices; 555 556 /* Set types for local objects needed by BDDC precondtioner */ 557 impMatType = MATSEQDENSE; 558 559 /* vertices in boundary numbering */ 560 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 561 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 562 if (i != n_vertices) { 563 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 564 } 565 566 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 567 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 568 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 569 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 571 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 573 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 576 577 unsymmetric_check = PETSC_FALSE; 578 /* allocate workspace */ 579 n = 0; 580 if (n_constraints) { 581 n += n_R*n_constraints; 582 } 583 if (n_vertices) { 584 n = PetscMax(2*n_R*n_vertices,n); 585 n = PetscMax((n_R+n_B)*n_vertices,n); 586 } 587 if (!pcbddc->symmetric_primal) { 588 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 589 unsymmetric_check = PETSC_TRUE; 590 } 591 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 592 593 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 594 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 595 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 596 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 597 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 598 if (isLU || isILU || isCHOL) { 599 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 600 } else if (sub_schurs->reuse_mumps) { 601 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 602 MatFactorType type; 603 604 F = reuse_mumps->F; 605 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 606 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 607 } else { 608 F = NULL; 609 } 610 611 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 612 if (n_constraints) { 613 Mat M1,M2,M3; 614 Mat auxmat; 615 IS is_aux; 616 PetscScalar *array,*array2; 617 618 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 627 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 641 if (F) { 642 Mat B; 643 644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 645 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 646 ierr = MatDestroy(&B);CHKERRQ(ierr); 647 } else { 648 PetscScalar *marr; 649 650 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 651 for (i=0;i<n_constraints;i++) { 652 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 653 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 654 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 657 } 658 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 659 } 660 if (!pcbddc->switch_static) { 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 662 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 663 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 664 for (i=0;i<n_constraints;i++) { 665 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 666 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 667 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 670 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 671 } 672 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 674 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 675 } else { 676 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 677 pcbddc->local_auxmat2 = local_auxmat2_R; 678 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 679 } 680 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 681 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 682 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 683 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 684 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 685 if (isCHOL) { 686 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 687 } else { 688 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 689 } 690 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 691 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 692 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 693 ierr = MatDestroy(&M2);CHKERRQ(ierr); 694 ierr = MatDestroy(&M3);CHKERRQ(ierr); 695 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 696 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 697 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 698 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 699 ierr = MatDestroy(&M1);CHKERRQ(ierr); 700 } 701 /* Get submatrices from subdomain matrix */ 702 if (n_vertices) { 703 IS is_aux; 704 705 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 706 IS tis; 707 708 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 709 ierr = ISSort(tis);CHKERRQ(ierr); 710 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 711 ierr = ISDestroy(&tis);CHKERRQ(ierr); 712 } else { 713 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 714 } 715 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 718 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 719 } 720 721 /* Matrix of coarse basis functions (local) */ 722 if (pcbddc->coarse_phi_B) { 723 PetscInt on_B,on_primal,on_D=n_D; 724 if (pcbddc->coarse_phi_D) { 725 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 726 } 727 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 728 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 729 PetscScalar *marray; 730 731 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 732 ierr = PetscFree(marray);CHKERRQ(ierr); 733 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 734 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 735 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 736 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 737 } 738 } 739 740 if (!pcbddc->coarse_phi_B) { 741 PetscScalar *marray; 742 743 n = n_B*pcbddc->local_primal_size; 744 if (pcbddc->switch_static || pcbddc->dbg_flag) { 745 n += n_D*pcbddc->local_primal_size; 746 } 747 if (!pcbddc->symmetric_primal) { 748 n *= 2; 749 } 750 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 755 n += n_D*pcbddc->local_primal_size; 756 } 757 if (!pcbddc->symmetric_primal) { 758 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 n = n_B*pcbddc->local_primal_size; 761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 762 } 763 } else { 764 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 765 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 766 if (pcbddc->switch_static || pcbddc->dbg_flag) { 767 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 768 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 769 } 770 } 771 } 772 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 773 /* vertices */ 774 if (n_vertices) { 775 776 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 777 778 if (n_R) { 779 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 780 PetscBLASInt B_N,B_one = 1; 781 PetscScalar *x,*y; 782 PetscBool isseqaij; 783 784 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 785 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 787 if (F) { /* TODO could be optimized for symmetric problems */ 788 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 789 } else { 790 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 791 for (i=0;i<n_vertices;i++) { 792 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 793 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 794 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 795 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 796 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 797 } 798 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 799 } 800 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 801 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 802 if (n_constraints) { 803 Mat B; 804 805 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 806 for (i=0;i<n_vertices;i++) { 807 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 808 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 809 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 810 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 811 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 812 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 813 } 814 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 815 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 816 ierr = MatDestroy(&B);CHKERRQ(ierr); 817 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 818 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 819 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 820 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 821 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 822 ierr = MatDestroy(&B);CHKERRQ(ierr); 823 } 824 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 825 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 826 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 827 } 828 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 829 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 830 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 831 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 833 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 834 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 835 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 836 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 837 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 838 } else { 839 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 840 } 841 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 842 /* coarse basis functions */ 843 for (i=0;i<n_vertices;i++) { 844 PetscScalar *y; 845 846 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 847 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 848 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 849 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 850 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 y[n_B*i+idx_V_B[i]] = 1.0; 852 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 853 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 854 855 if (pcbddc->switch_static || pcbddc->dbg_flag) { 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 862 } 863 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 864 } 865 /* if n_R == 0 the object is not destroyed */ 866 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 867 } 868 869 if (n_constraints) { 870 Mat B; 871 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 873 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 874 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 875 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 876 if (n_vertices) { 877 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 878 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 879 } else { 880 Mat S_VCt; 881 882 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 883 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 884 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 885 } 886 } 887 ierr = MatDestroy(&B);CHKERRQ(ierr); 888 /* coarse basis functions */ 889 for (i=0;i<n_constraints;i++) { 890 PetscScalar *y; 891 892 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 893 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 898 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 899 if (pcbddc->switch_static || pcbddc->dbg_flag) { 900 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 902 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 905 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 906 } 907 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 908 } 909 } 910 if (n_constraints) { 911 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 912 } 913 914 /* compute other basis functions for non-symmetric problems */ 915 if (!pcbddc->symmetric_primal) { 916 917 if (n_constraints) { 918 Mat S_CCT,B_C; 919 920 /* this is a lazy thing */ 921 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 923 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 924 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 925 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 926 if (n_vertices) { 927 Mat B_V,S_VCT; 928 929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 930 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 931 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 932 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 933 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 934 } 935 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 936 } else { /* if there are no constraints, reset work */ 937 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 938 } 939 if (n_vertices && n_R) { 940 Mat A_VRT; 941 PetscScalar *marray; 942 PetscBLASInt B_N,B_one = 1; 943 944 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 945 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 946 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 947 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 948 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 949 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 950 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 951 } 952 953 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 954 for (i=0;i<pcbddc->local_primal_size;i++) { 955 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 956 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 957 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 958 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 960 } 961 } else { 962 for (i=0;i<pcbddc->local_primal_size;i++) { 963 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 964 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 965 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 966 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 968 } 969 } 970 /* coarse basis functions */ 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 PetscScalar *y; 973 974 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 975 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 976 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 977 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 979 if (i<n_vertices) { 980 y[n_B*i+idx_V_B[i]] = 1.0; 981 } 982 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 983 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 984 985 if (pcbddc->switch_static || pcbddc->dbg_flag) { 986 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 987 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 988 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 992 } 993 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 994 } 995 } 996 /* free memory */ 997 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 998 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 999 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1000 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1001 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1002 ierr = PetscFree(work);CHKERRQ(ierr); 1003 if (n_vertices) { 1004 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1005 } 1006 if (n_constraints) { 1007 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1008 } 1009 /* Checking coarse_sub_mat and coarse basis functios */ 1010 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1011 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1012 if (pcbddc->dbg_flag) { 1013 Mat coarse_sub_mat; 1014 Mat AUXMAT,TM1,TM2,TM3,TM4; 1015 Mat coarse_phi_D,coarse_phi_B; 1016 Mat coarse_psi_D,coarse_psi_B; 1017 Mat A_II,A_BB,A_IB,A_BI; 1018 Mat C_B,CPHI; 1019 IS is_dummy; 1020 Vec mones; 1021 MatType checkmattype=MATSEQAIJ; 1022 PetscReal real_value; 1023 1024 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1025 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1026 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1027 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1028 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1029 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1030 if (unsymmetric_check) { 1031 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1032 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1033 } 1034 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1035 1036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1038 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1041 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1042 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1044 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1045 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1046 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1047 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1048 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 } else { 1053 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1054 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } 1062 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1063 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1064 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1065 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1066 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1067 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1068 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1070 1071 /* check constraints */ 1072 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1073 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1074 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1075 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1076 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1077 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1078 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1079 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1080 if (unsymmetric_check) { 1081 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1082 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1083 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1084 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1086 } 1087 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1088 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1089 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1090 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1091 1092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1093 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1094 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1095 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1096 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1097 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1098 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1099 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1100 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1101 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1102 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1103 if (unsymmetric_check) { 1104 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1105 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1106 } 1107 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1108 } 1109 /* get back data */ 1110 *coarse_submat_vals_n = coarse_submat_vals; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1116 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1117 { 1118 Mat *work_mat; 1119 IS isrow_s,iscol_s; 1120 PetscBool rsorted,csorted; 1121 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1126 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1127 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1128 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1129 1130 if (!rsorted) { 1131 const PetscInt *idxs; 1132 PetscInt *idxs_sorted,i; 1133 1134 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1135 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1136 for (i=0;i<rsize;i++) { 1137 idxs_perm_r[i] = i; 1138 } 1139 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1140 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1141 for (i=0;i<rsize;i++) { 1142 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1143 } 1144 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1145 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1146 } else { 1147 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1148 isrow_s = isrow; 1149 } 1150 1151 if (!csorted) { 1152 if (isrow == iscol) { 1153 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1154 iscol_s = isrow_s; 1155 } else { 1156 const PetscInt *idxs; 1157 PetscInt *idxs_sorted,i; 1158 1159 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1160 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1161 for (i=0;i<csize;i++) { 1162 idxs_perm_c[i] = i; 1163 } 1164 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1165 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1166 for (i=0;i<csize;i++) { 1167 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1168 } 1169 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1170 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1171 } 1172 } else { 1173 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1174 iscol_s = iscol; 1175 } 1176 1177 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1178 1179 if (!rsorted || !csorted) { 1180 Mat new_mat; 1181 IS is_perm_r,is_perm_c; 1182 1183 if (!rsorted) { 1184 PetscInt *idxs_r,i; 1185 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1186 for (i=0;i<rsize;i++) { 1187 idxs_r[idxs_perm_r[i]] = i; 1188 } 1189 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1190 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1191 } else { 1192 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1193 } 1194 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1195 1196 if (!csorted) { 1197 if (isrow_s == iscol_s) { 1198 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1199 is_perm_c = is_perm_r; 1200 } else { 1201 PetscInt *idxs_c,i; 1202 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1203 for (i=0;i<csize;i++) { 1204 idxs_c[idxs_perm_c[i]] = i; 1205 } 1206 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1207 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1208 } 1209 } else { 1210 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1211 } 1212 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1213 1214 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1215 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1216 work_mat[0] = new_mat; 1217 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1218 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1219 } 1220 1221 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1222 *B = work_mat[0]; 1223 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1224 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1225 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1231 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1232 { 1233 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1234 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1235 Mat new_mat; 1236 IS is_local,is_global; 1237 PetscInt local_size; 1238 PetscBool isseqaij; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1243 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1244 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1245 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1246 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1247 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1248 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1249 1250 /* check */ 1251 if (pcbddc->dbg_flag) { 1252 Vec x,x_change; 1253 PetscReal error; 1254 1255 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1256 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1257 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1258 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1261 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1263 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1264 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1265 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1267 ierr = VecDestroy(&x);CHKERRQ(ierr); 1268 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1269 } 1270 1271 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1272 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1273 if (isseqaij) { 1274 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1275 } else { 1276 Mat work_mat; 1277 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1278 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1279 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1280 } 1281 if (matis->A->symmetric_set) { 1282 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1283 #if !defined(PETSC_USE_COMPLEX) 1284 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1285 #endif 1286 } 1287 /* 1288 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1289 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1290 */ 1291 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1297 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1298 { 1299 PC_IS* pcis = (PC_IS*)(pc->data); 1300 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1301 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1302 PetscInt *idx_R_local=NULL; 1303 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1304 PetscInt vbs,bs; 1305 PetscBT bitmask=NULL; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 /* 1310 No need to setup local scatters if 1311 - primal space is unchanged 1312 AND 1313 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1314 AND 1315 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1316 */ 1317 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1318 PetscFunctionReturn(0); 1319 } 1320 /* destroy old objects */ 1321 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1322 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1323 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1324 /* Set Non-overlapping dimensions */ 1325 n_B = pcis->n_B; 1326 n_D = pcis->n - n_B; 1327 n_vertices = pcbddc->n_vertices; 1328 1329 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1330 1331 /* create auxiliary bitmask and allocate workspace */ 1332 if (!sub_schurs->reuse_mumps) { 1333 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1334 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1335 for (i=0;i<n_vertices;i++) { 1336 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1337 } 1338 1339 for (i=0, n_R=0; i<pcis->n; i++) { 1340 if (!PetscBTLookup(bitmask,i)) { 1341 idx_R_local[n_R++] = i; 1342 } 1343 } 1344 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1345 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1346 1347 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1348 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1349 } 1350 1351 /* Block code */ 1352 vbs = 1; 1353 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1354 if (bs>1 && !(n_vertices%bs)) { 1355 PetscBool is_blocked = PETSC_TRUE; 1356 PetscInt *vary; 1357 if (!sub_schurs->reuse_mumps) { 1358 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1359 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1360 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1361 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1362 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1363 for (i=0; i<pcis->n/bs; i++) { 1364 if (vary[i]!=0 && vary[i]!=bs) { 1365 is_blocked = PETSC_FALSE; 1366 break; 1367 } 1368 } 1369 ierr = PetscFree(vary);CHKERRQ(ierr); 1370 } else { 1371 /* Verify directly the R set */ 1372 for (i=0; i<n_R/bs; i++) { 1373 PetscInt j,node=idx_R_local[bs*i]; 1374 for (j=1; j<bs; j++) { 1375 if (node != idx_R_local[bs*i+j]-j) { 1376 is_blocked = PETSC_FALSE; 1377 break; 1378 } 1379 } 1380 } 1381 } 1382 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1383 vbs = bs; 1384 for (i=0;i<n_R/vbs;i++) { 1385 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1386 } 1387 } 1388 } 1389 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1390 if (sub_schurs->reuse_mumps) { 1391 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1392 1393 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1394 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1395 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1396 reuse_mumps->is_R = pcbddc->is_R_local; 1397 } else { 1398 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1399 } 1400 1401 /* print some info if requested */ 1402 if (pcbddc->dbg_flag) { 1403 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1405 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1410 } 1411 1412 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1413 if (!sub_schurs->reuse_mumps) { 1414 IS is_aux1,is_aux2; 1415 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1416 1417 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1418 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1419 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1420 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1421 for (i=0; i<n_D; i++) { 1422 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1423 } 1424 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1425 for (i=0, j=0; i<n_R; i++) { 1426 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1427 aux_array1[j++] = i; 1428 } 1429 } 1430 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1431 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1432 for (i=0, j=0; i<n_B; i++) { 1433 if (!PetscBTLookup(bitmask,is_indices[i])) { 1434 aux_array2[j++] = i; 1435 } 1436 } 1437 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1439 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1440 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1441 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1442 1443 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1444 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1445 for (i=0, j=0; i<n_R; i++) { 1446 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1447 aux_array1[j++] = i; 1448 } 1449 } 1450 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1451 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1452 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1453 } 1454 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1455 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1456 } else { 1457 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1458 IS tis; 1459 PetscInt schur_size; 1460 1461 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1462 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1463 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1464 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1465 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1466 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1467 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1468 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1469 } 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1477 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1478 { 1479 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1480 PC_IS *pcis = (PC_IS*)pc->data; 1481 PC pc_temp; 1482 Mat A_RR; 1483 MatReuse reuse; 1484 PetscScalar m_one = -1.0; 1485 PetscReal value; 1486 PetscInt n_D,n_R; 1487 PetscBool use_exact,use_exact_reduced,issbaij; 1488 PetscErrorCode ierr; 1489 /* prefixes stuff */ 1490 char dir_prefix[256],neu_prefix[256],str_level[16]; 1491 size_t len; 1492 1493 PetscFunctionBegin; 1494 1495 /* compute prefixes */ 1496 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1497 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1498 if (!pcbddc->current_level) { 1499 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1500 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1501 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1502 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1503 } else { 1504 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1505 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1506 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1507 len -= 15; /* remove "pc_bddc_coarse_" */ 1508 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1509 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1510 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1511 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1512 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1513 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1514 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1515 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1516 } 1517 1518 /* DIRICHLET PROBLEM */ 1519 if (dirichlet) { 1520 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1521 if (pcbddc->local_mat->symmetric_set) { 1522 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1523 } 1524 /* Matrix for Dirichlet problem is pcis->A_II */ 1525 n_D = pcis->n - pcis->n_B; 1526 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1527 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1528 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1529 /* default */ 1530 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1531 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1532 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1533 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1534 if (issbaij) { 1535 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1536 } else { 1537 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1538 } 1539 /* Allow user's customization */ 1540 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1541 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1542 } 1543 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1544 if (sub_schurs->reuse_mumps) { 1545 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1546 1547 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1548 } 1549 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1550 if (!n_D) { 1551 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1552 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1553 } 1554 /* Set Up KSP for Dirichlet problem of BDDC */ 1555 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1556 /* set ksp_D into pcis data */ 1557 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1558 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1559 pcis->ksp_D = pcbddc->ksp_D; 1560 } 1561 1562 /* NEUMANN PROBLEM */ 1563 A_RR = 0; 1564 if (neumann) { 1565 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1566 PetscInt ibs,mbs; 1567 PetscBool issbaij; 1568 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1569 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1570 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1571 if (pcbddc->ksp_R) { /* already created ksp */ 1572 PetscInt nn_R; 1573 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1574 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1575 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1576 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1577 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1579 reuse = MAT_INITIAL_MATRIX; 1580 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1581 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1582 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1583 reuse = MAT_INITIAL_MATRIX; 1584 } else { /* safe to reuse the matrix */ 1585 reuse = MAT_REUSE_MATRIX; 1586 } 1587 } 1588 /* last check */ 1589 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1590 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1591 reuse = MAT_INITIAL_MATRIX; 1592 } 1593 } else { /* first time, so we need to create the matrix */ 1594 reuse = MAT_INITIAL_MATRIX; 1595 } 1596 /* extract A_RR */ 1597 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1598 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1599 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1600 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1601 if (matis->A == pcbddc->local_mat) { 1602 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1603 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1604 } else { 1605 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1606 } 1607 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1608 if (matis->A == pcbddc->local_mat) { 1609 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1610 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1611 } else { 1612 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1613 } 1614 } 1615 if (!sub_schurs->reuse_mumps) { 1616 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1617 if (pcbddc->local_mat->symmetric_set) { 1618 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1619 } 1620 } else { 1621 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1622 1623 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1624 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1625 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1626 } 1627 if (!pcbddc->ksp_R) { /* create object if not present */ 1628 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1629 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1630 /* default */ 1631 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1632 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1633 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1634 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1635 if (issbaij) { 1636 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1637 } else { 1638 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1639 } 1640 /* Allow user's customization */ 1641 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1642 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1643 } 1644 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1645 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1646 if (!n_R) { 1647 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1648 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1649 } 1650 /* Reuse MUMPS solver if it is present */ 1651 if (sub_schurs->reuse_mumps) { 1652 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1653 1654 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1655 } 1656 /* Set Up KSP for Neumann problem of BDDC */ 1657 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1658 } 1659 /* free Neumann problem's matrix */ 1660 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1661 1662 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1663 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1664 if (pcbddc->dbg_flag) { 1665 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1666 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1667 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1668 } 1669 if (dirichlet) { /* Dirichlet */ 1670 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1671 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1672 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1673 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1674 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1675 /* need to be adapted? */ 1676 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1677 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1678 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1679 /* print info */ 1680 if (pcbddc->dbg_flag) { 1681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1682 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1683 } 1684 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1685 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1686 } 1687 } 1688 if (neumann) { /* Neumann */ 1689 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1690 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1691 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1692 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1693 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1694 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1695 /* need to be adapted? */ 1696 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1697 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1698 /* print info */ 1699 if (pcbddc->dbg_flag) { 1700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1702 } 1703 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1704 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1705 } 1706 } 1707 } 1708 PetscFunctionReturn(0); 1709 } 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1713 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1714 { 1715 PetscErrorCode ierr; 1716 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1717 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1718 1719 PetscFunctionBegin; 1720 if (!sub_schurs->reuse_mumps) { 1721 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1722 } 1723 if (!pcbddc->switch_static) { 1724 if (applytranspose && pcbddc->local_auxmat1) { 1725 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1726 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1727 } 1728 if (!sub_schurs->reuse_mumps) { 1729 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 } else { 1732 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1733 1734 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1735 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 } 1737 } else { 1738 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1742 if (applytranspose && pcbddc->local_auxmat1) { 1743 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1744 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1745 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 } 1748 } 1749 if (!sub_schurs->reuse_mumps) { 1750 if (applytranspose) { 1751 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1752 } else { 1753 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1754 } 1755 #if defined(PETSC_HAVE_MUMPS) 1756 } else { 1757 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1758 1759 if (applytranspose) { 1760 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1761 } else { 1762 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1763 } 1764 #endif 1765 } 1766 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1767 if (!pcbddc->switch_static) { 1768 if (!sub_schurs->reuse_mumps) { 1769 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 } else { 1772 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1773 1774 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1776 } 1777 if (!applytranspose && pcbddc->local_auxmat1) { 1778 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1779 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1780 } 1781 } else { 1782 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 if (!applytranspose && pcbddc->local_auxmat1) { 1787 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1788 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1789 } 1790 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1801 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1802 { 1803 PetscErrorCode ierr; 1804 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1805 PC_IS* pcis = (PC_IS*) (pc->data); 1806 const PetscScalar zero = 0.0; 1807 1808 PetscFunctionBegin; 1809 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1810 if (applytranspose) { 1811 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1812 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1813 } else { 1814 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1815 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1816 } 1817 /* start communications from local primal nodes to rhs of coarse solver */ 1818 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1819 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1820 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1821 1822 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1823 /* TODO remove null space when doing multilevel */ 1824 if (pcbddc->coarse_ksp) { 1825 Vec rhs,sol; 1826 1827 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1828 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1829 if (applytranspose) { 1830 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1831 } else { 1832 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1833 } 1834 } 1835 1836 /* Local solution on R nodes */ 1837 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1838 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1839 } 1840 1841 /* communications from coarse sol to local primal nodes */ 1842 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1843 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1844 1845 /* Sum contributions from two levels */ 1846 if (applytranspose) { 1847 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1848 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1849 } else { 1850 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1851 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1858 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1859 { 1860 PetscErrorCode ierr; 1861 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1862 PetscScalar *array; 1863 Vec from,to; 1864 1865 PetscFunctionBegin; 1866 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1867 from = pcbddc->coarse_vec; 1868 to = pcbddc->vec1_P; 1869 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1870 Vec tvec; 1871 1872 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1873 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1874 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1875 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1876 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1877 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1878 } 1879 } else { /* from local to global -> put data in coarse right hand side */ 1880 from = pcbddc->vec1_P; 1881 to = pcbddc->coarse_vec; 1882 } 1883 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1889 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1890 { 1891 PetscErrorCode ierr; 1892 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1893 PetscScalar *array; 1894 Vec from,to; 1895 1896 PetscFunctionBegin; 1897 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1898 from = pcbddc->coarse_vec; 1899 to = pcbddc->vec1_P; 1900 } else { /* from local to global -> put data in coarse right hand side */ 1901 from = pcbddc->vec1_P; 1902 to = pcbddc->coarse_vec; 1903 } 1904 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1905 if (smode == SCATTER_FORWARD) { 1906 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1907 Vec tvec; 1908 1909 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1910 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1911 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1912 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1913 } 1914 } else { 1915 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1916 ierr = VecResetArray(from);CHKERRQ(ierr); 1917 } 1918 } 1919 PetscFunctionReturn(0); 1920 } 1921 1922 /* uncomment for testing purposes */ 1923 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1926 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1927 { 1928 PetscErrorCode ierr; 1929 PC_IS* pcis = (PC_IS*)(pc->data); 1930 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1931 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1932 /* one and zero */ 1933 PetscScalar one=1.0,zero=0.0; 1934 /* space to store constraints and their local indices */ 1935 PetscScalar *constraints_data; 1936 PetscInt *constraints_idxs,*constraints_idxs_B; 1937 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1938 PetscInt *constraints_n; 1939 /* iterators */ 1940 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1941 /* BLAS integers */ 1942 PetscBLASInt lwork,lierr; 1943 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1944 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1945 /* reuse */ 1946 PetscInt olocal_primal_size,olocal_primal_size_cc; 1947 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1948 /* change of basis */ 1949 PetscBool qr_needed; 1950 PetscBT change_basis,qr_needed_idx; 1951 /* auxiliary stuff */ 1952 PetscInt *nnz,*is_indices; 1953 PetscInt ncc; 1954 /* some quantities */ 1955 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1956 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1957 1958 PetscFunctionBegin; 1959 /* Destroy Mat objects computed previously */ 1960 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1961 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1962 /* save info on constraints from previous setup (if any) */ 1963 olocal_primal_size = pcbddc->local_primal_size; 1964 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1965 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1966 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1967 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1968 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1969 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1970 1971 /* print some info */ 1972 if (pcbddc->dbg_flag) { 1973 IS vertices; 1974 PetscInt nv,nedges,nfaces; 1975 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1976 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1977 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1978 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1980 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1981 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1984 } 1985 1986 if (!pcbddc->adaptive_selection) { 1987 IS ISForVertices,*ISForFaces,*ISForEdges; 1988 MatNullSpace nearnullsp; 1989 const Vec *nearnullvecs; 1990 Vec *localnearnullsp; 1991 PetscScalar *array; 1992 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1993 PetscBool nnsp_has_cnst; 1994 /* LAPACK working arrays for SVD or POD */ 1995 PetscBool skip_lapack,boolforchange; 1996 PetscScalar *work; 1997 PetscReal *singular_vals; 1998 #if defined(PETSC_USE_COMPLEX) 1999 PetscReal *rwork; 2000 #endif 2001 #if defined(PETSC_MISSING_LAPACK_GESVD) 2002 PetscScalar *temp_basis,*correlation_mat; 2003 #else 2004 PetscBLASInt dummy_int=1; 2005 PetscScalar dummy_scalar=1.; 2006 #endif 2007 2008 /* Get index sets for faces, edges and vertices from graph */ 2009 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2010 /* free unneeded index sets */ 2011 if (!pcbddc->use_vertices) { 2012 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2013 } 2014 if (!pcbddc->use_edges) { 2015 for (i=0;i<n_ISForEdges;i++) { 2016 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2019 n_ISForEdges = 0; 2020 } 2021 if (!pcbddc->use_faces) { 2022 for (i=0;i<n_ISForFaces;i++) { 2023 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2024 } 2025 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2026 n_ISForFaces = 0; 2027 } 2028 2029 #if defined(PETSC_USE_DEBUG) 2030 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2031 Also use_change_of_basis should be consistent among processors */ 2032 if (pcbddc->NullSpace) { 2033 PetscBool tbool[2],gbool[2]; 2034 2035 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2036 pcbddc->use_change_of_basis = PETSC_TRUE; 2037 if (!ISForEdges) { 2038 pcbddc->use_change_on_faces = PETSC_TRUE; 2039 } 2040 } 2041 tbool[0] = pcbddc->use_change_of_basis; 2042 tbool[1] = pcbddc->use_change_on_faces; 2043 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2044 pcbddc->use_change_of_basis = gbool[0]; 2045 pcbddc->use_change_on_faces = gbool[1]; 2046 } 2047 #endif 2048 2049 /* check if near null space is attached to global mat */ 2050 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2051 if (nearnullsp) { 2052 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2053 /* remove any stored info */ 2054 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2055 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2056 /* store information for BDDC solver reuse */ 2057 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2058 pcbddc->onearnullspace = nearnullsp; 2059 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2060 for (i=0;i<nnsp_size;i++) { 2061 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2062 } 2063 } else { /* if near null space is not provided BDDC uses constants by default */ 2064 nnsp_size = 0; 2065 nnsp_has_cnst = PETSC_TRUE; 2066 } 2067 /* get max number of constraints on a single cc */ 2068 max_constraints = nnsp_size; 2069 if (nnsp_has_cnst) max_constraints++; 2070 2071 /* 2072 Evaluate maximum storage size needed by the procedure 2073 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2074 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2075 There can be multiple constraints per connected component 2076 */ 2077 n_vertices = 0; 2078 if (ISForVertices) { 2079 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2080 } 2081 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2082 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2083 2084 total_counts = n_ISForFaces+n_ISForEdges; 2085 total_counts *= max_constraints; 2086 total_counts += n_vertices; 2087 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2088 2089 total_counts = 0; 2090 max_size_of_constraint = 0; 2091 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2092 IS used_is; 2093 if (i<n_ISForEdges) { 2094 used_is = ISForEdges[i]; 2095 } else { 2096 used_is = ISForFaces[i-n_ISForEdges]; 2097 } 2098 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2099 total_counts += j; 2100 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2101 } 2102 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2103 2104 /* get local part of global near null space vectors */ 2105 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2106 for (k=0;k<nnsp_size;k++) { 2107 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2108 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2110 } 2111 2112 /* whether or not to skip lapack calls */ 2113 skip_lapack = PETSC_TRUE; 2114 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2115 2116 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2117 if (!skip_lapack) { 2118 PetscScalar temp_work; 2119 2120 #if defined(PETSC_MISSING_LAPACK_GESVD) 2121 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2122 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2123 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2124 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2125 #if defined(PETSC_USE_COMPLEX) 2126 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2127 #endif 2128 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2129 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2130 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2131 lwork = -1; 2132 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2133 #if !defined(PETSC_USE_COMPLEX) 2134 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2135 #else 2136 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2137 #endif 2138 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2139 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2140 #else /* on missing GESVD */ 2141 /* SVD */ 2142 PetscInt max_n,min_n; 2143 max_n = max_size_of_constraint; 2144 min_n = max_constraints; 2145 if (max_size_of_constraint < max_constraints) { 2146 min_n = max_size_of_constraint; 2147 max_n = max_constraints; 2148 } 2149 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2150 #if defined(PETSC_USE_COMPLEX) 2151 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2152 #endif 2153 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2154 lwork = -1; 2155 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2156 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2157 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2158 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2159 #if !defined(PETSC_USE_COMPLEX) 2160 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2161 #else 2162 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2163 #endif 2164 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2165 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2166 #endif /* on missing GESVD */ 2167 /* Allocate optimal workspace */ 2168 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2169 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2170 } 2171 /* Now we can loop on constraining sets */ 2172 total_counts = 0; 2173 constraints_idxs_ptr[0] = 0; 2174 constraints_data_ptr[0] = 0; 2175 /* vertices */ 2176 if (n_vertices) { 2177 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2178 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2179 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2180 for (i=0;i<n_vertices;i++) { 2181 constraints_n[total_counts] = 1; 2182 constraints_data[total_counts] = 1.0; 2183 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2184 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2185 total_counts++; 2186 } 2187 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2188 PetscBool used_vertex; 2189 for (i=0;i<n_vertices;i++) { 2190 used_vertex = PETSC_FALSE; 2191 k = 0; 2192 while (!used_vertex && k<nnsp_size) { 2193 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2194 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2195 constraints_n[total_counts] = 1; 2196 constraints_idxs[total_counts] = is_indices[i]; 2197 constraints_data[total_counts] = 1.0; 2198 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2199 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2200 total_counts++; 2201 used_vertex = PETSC_TRUE; 2202 } 2203 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2204 k++; 2205 } 2206 } 2207 } 2208 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2209 n_vertices = total_counts; 2210 } 2211 2212 /* edges and faces */ 2213 total_counts_cc = total_counts; 2214 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2215 IS used_is; 2216 PetscBool idxs_copied = PETSC_FALSE; 2217 2218 if (ncc<n_ISForEdges) { 2219 used_is = ISForEdges[ncc]; 2220 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2221 } else { 2222 used_is = ISForFaces[ncc-n_ISForEdges]; 2223 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2224 } 2225 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2226 2227 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2228 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2229 /* change of basis should not be performed on local periodic nodes */ 2230 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2231 if (nnsp_has_cnst) { 2232 PetscScalar quad_value; 2233 2234 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2235 idxs_copied = PETSC_TRUE; 2236 2237 if (!pcbddc->use_nnsp_true) { 2238 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2239 } else { 2240 quad_value = 1.0; 2241 } 2242 for (j=0;j<size_of_constraint;j++) { 2243 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2244 } 2245 temp_constraints++; 2246 total_counts++; 2247 } 2248 for (k=0;k<nnsp_size;k++) { 2249 PetscReal real_value; 2250 PetscScalar *ptr_to_data; 2251 2252 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2253 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2254 for (j=0;j<size_of_constraint;j++) { 2255 ptr_to_data[j] = array[is_indices[j]]; 2256 } 2257 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2258 /* check if array is null on the connected component */ 2259 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2260 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2261 if (real_value > 0.0) { /* keep indices and values */ 2262 temp_constraints++; 2263 total_counts++; 2264 if (!idxs_copied) { 2265 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2266 idxs_copied = PETSC_TRUE; 2267 } 2268 } 2269 } 2270 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2271 valid_constraints = temp_constraints; 2272 if (!pcbddc->use_nnsp_true && temp_constraints) { 2273 if (temp_constraints == 1) { /* just normalize the constraint */ 2274 PetscScalar norm,*ptr_to_data; 2275 2276 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2278 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2279 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2280 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2281 } else { /* perform SVD */ 2282 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2283 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2284 2285 #if defined(PETSC_MISSING_LAPACK_GESVD) 2286 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2287 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2288 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2289 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2290 from that computed using LAPACKgesvd 2291 -> This is due to a different computation of eigenvectors in LAPACKheev 2292 -> The quality of the POD-computed basis will be the same */ 2293 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2294 /* Store upper triangular part of correlation matrix */ 2295 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2296 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2297 for (j=0;j<temp_constraints;j++) { 2298 for (k=0;k<j+1;k++) { 2299 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2300 } 2301 } 2302 /* compute eigenvalues and eigenvectors of correlation matrix */ 2303 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2304 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2305 #if !defined(PETSC_USE_COMPLEX) 2306 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2307 #else 2308 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2309 #endif 2310 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2311 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2312 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2313 j = 0; 2314 while (j < temp_constraints && singular_vals[j] < tol) j++; 2315 total_counts = total_counts-j; 2316 valid_constraints = temp_constraints-j; 2317 /* scale and copy POD basis into used quadrature memory */ 2318 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2319 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2320 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2321 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2322 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2323 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2324 if (j<temp_constraints) { 2325 PetscInt ii; 2326 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2328 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2329 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2330 for (k=0;k<temp_constraints-j;k++) { 2331 for (ii=0;ii<size_of_constraint;ii++) { 2332 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2333 } 2334 } 2335 } 2336 #else /* on missing GESVD */ 2337 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2338 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2339 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2340 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2341 #if !defined(PETSC_USE_COMPLEX) 2342 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2343 #else 2344 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2345 #endif 2346 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2347 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2348 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2349 k = temp_constraints; 2350 if (k > size_of_constraint) k = size_of_constraint; 2351 j = 0; 2352 while (j < k && singular_vals[k-j-1] < tol) j++; 2353 valid_constraints = k-j; 2354 total_counts = total_counts-temp_constraints+valid_constraints; 2355 #endif /* on missing GESVD */ 2356 } 2357 } 2358 /* update pointers information */ 2359 if (valid_constraints) { 2360 constraints_n[total_counts_cc] = valid_constraints; 2361 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2362 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2363 /* set change_of_basis flag */ 2364 if (boolforchange) { 2365 PetscBTSet(change_basis,total_counts_cc); 2366 } 2367 total_counts_cc++; 2368 } 2369 } 2370 /* free workspace */ 2371 if (!skip_lapack) { 2372 ierr = PetscFree(work);CHKERRQ(ierr); 2373 #if defined(PETSC_USE_COMPLEX) 2374 ierr = PetscFree(rwork);CHKERRQ(ierr); 2375 #endif 2376 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2377 #if defined(PETSC_MISSING_LAPACK_GESVD) 2378 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2379 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2380 #endif 2381 } 2382 for (k=0;k<nnsp_size;k++) { 2383 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2384 } 2385 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2386 /* free index sets of faces, edges and vertices */ 2387 for (i=0;i<n_ISForFaces;i++) { 2388 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2389 } 2390 if (n_ISForFaces) { 2391 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2392 } 2393 for (i=0;i<n_ISForEdges;i++) { 2394 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2395 } 2396 if (n_ISForEdges) { 2397 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2398 } 2399 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2400 } else { 2401 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2402 2403 total_counts = 0; 2404 n_vertices = 0; 2405 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2406 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2407 } 2408 max_constraints = 0; 2409 total_counts_cc = 0; 2410 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2411 total_counts += pcbddc->adaptive_constraints_n[i]; 2412 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2413 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2414 } 2415 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2416 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2417 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2418 constraints_data = pcbddc->adaptive_constraints_data; 2419 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2420 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2421 total_counts_cc = 0; 2422 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2423 if (pcbddc->adaptive_constraints_n[i]) { 2424 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2425 } 2426 } 2427 #if 0 2428 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2429 for (i=0;i<total_counts_cc;i++) { 2430 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2431 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2432 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2433 printf(" %d",constraints_idxs[j]); 2434 } 2435 printf("\n"); 2436 printf("number of cc: %d\n",constraints_n[i]); 2437 } 2438 for (i=0;i<n_vertices;i++) { 2439 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2440 } 2441 for (i=0;i<sub_schurs->n_subs;i++) { 2442 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2443 } 2444 #endif 2445 2446 max_size_of_constraint = 0; 2447 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2448 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2449 /* Change of basis */ 2450 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2451 if (pcbddc->use_change_of_basis) { 2452 for (i=0;i<sub_schurs->n_subs;i++) { 2453 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2454 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2455 } 2456 } 2457 } 2458 } 2459 pcbddc->local_primal_size = total_counts; 2460 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2461 2462 /* map constraints_idxs in boundary numbering */ 2463 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2464 if (i != constraints_idxs_ptr[total_counts_cc]) { 2465 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2466 } 2467 2468 /* Create constraint matrix */ 2469 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2470 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2471 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2472 2473 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2474 /* determine if a QR strategy is needed for change of basis */ 2475 qr_needed = PETSC_FALSE; 2476 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2477 total_primal_vertices=0; 2478 pcbddc->local_primal_size_cc = 0; 2479 for (i=0;i<total_counts_cc;i++) { 2480 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2481 if (size_of_constraint == 1) { 2482 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2483 pcbddc->local_primal_size_cc += 1; 2484 } else if (PetscBTLookup(change_basis,i)) { 2485 for (k=0;k<constraints_n[i];k++) { 2486 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2487 } 2488 pcbddc->local_primal_size_cc += constraints_n[i]; 2489 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2490 PetscBTSet(qr_needed_idx,i); 2491 qr_needed = PETSC_TRUE; 2492 } 2493 } else { 2494 pcbddc->local_primal_size_cc += 1; 2495 } 2496 } 2497 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2498 pcbddc->n_vertices = total_primal_vertices; 2499 /* permute indices in order to have a sorted set of vertices */ 2500 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2501 2502 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2503 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2504 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2505 2506 /* nonzero structure of constraint matrix */ 2507 /* and get reference dof for local constraints */ 2508 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2509 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2510 2511 j = total_primal_vertices; 2512 total_counts = total_primal_vertices; 2513 cum = total_primal_vertices; 2514 for (i=n_vertices;i<total_counts_cc;i++) { 2515 if (!PetscBTLookup(change_basis,i)) { 2516 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2517 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2518 cum++; 2519 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2520 for (k=0;k<constraints_n[i];k++) { 2521 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2522 nnz[j+k] = size_of_constraint; 2523 } 2524 j += constraints_n[i]; 2525 } 2526 } 2527 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2528 ierr = PetscFree(nnz);CHKERRQ(ierr); 2529 2530 /* set values in constraint matrix */ 2531 for (i=0;i<total_primal_vertices;i++) { 2532 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2533 } 2534 total_counts = total_primal_vertices; 2535 for (i=n_vertices;i<total_counts_cc;i++) { 2536 if (!PetscBTLookup(change_basis,i)) { 2537 PetscInt *cols; 2538 2539 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2540 cols = constraints_idxs+constraints_idxs_ptr[i]; 2541 for (k=0;k<constraints_n[i];k++) { 2542 PetscInt row = total_counts+k; 2543 PetscScalar *vals; 2544 2545 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2546 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2547 } 2548 total_counts += constraints_n[i]; 2549 } 2550 } 2551 /* assembling */ 2552 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2553 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2554 2555 /* 2556 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2557 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2558 */ 2559 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2560 if (pcbddc->use_change_of_basis) { 2561 /* dual and primal dofs on a single cc */ 2562 PetscInt dual_dofs,primal_dofs; 2563 /* working stuff for GEQRF */ 2564 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2565 PetscBLASInt lqr_work; 2566 /* working stuff for UNGQR */ 2567 PetscScalar *gqr_work,lgqr_work_t; 2568 PetscBLASInt lgqr_work; 2569 /* working stuff for TRTRS */ 2570 PetscScalar *trs_rhs; 2571 PetscBLASInt Blas_NRHS; 2572 /* pointers for values insertion into change of basis matrix */ 2573 PetscInt *start_rows,*start_cols; 2574 PetscScalar *start_vals; 2575 /* working stuff for values insertion */ 2576 PetscBT is_primal; 2577 PetscInt *aux_primal_numbering_B; 2578 /* matrix sizes */ 2579 PetscInt global_size,local_size; 2580 /* temporary change of basis */ 2581 Mat localChangeOfBasisMatrix; 2582 /* extra space for debugging */ 2583 PetscScalar *dbg_work; 2584 2585 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2586 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2587 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2588 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2589 /* nonzeros for local mat */ 2590 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2591 for (i=0;i<pcis->n;i++) nnz[i]=1; 2592 for (i=n_vertices;i<total_counts_cc;i++) { 2593 if (PetscBTLookup(change_basis,i)) { 2594 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2595 if (PetscBTLookup(qr_needed_idx,i)) { 2596 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2597 } else { 2598 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2599 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2600 } 2601 } 2602 } 2603 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2604 ierr = PetscFree(nnz);CHKERRQ(ierr); 2605 /* Set initial identity in the matrix */ 2606 for (i=0;i<pcis->n;i++) { 2607 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2608 } 2609 2610 if (pcbddc->dbg_flag) { 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2613 } 2614 2615 2616 /* Now we loop on the constraints which need a change of basis */ 2617 /* 2618 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2619 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2620 2621 Basic blocks of change of basis matrix T computed by 2622 2623 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2624 2625 | 1 0 ... 0 s_1/S | 2626 | 0 1 ... 0 s_2/S | 2627 | ... | 2628 | 0 ... 1 s_{n-1}/S | 2629 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2630 2631 with S = \sum_{i=1}^n s_i^2 2632 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2633 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2634 2635 - QR decomposition of constraints otherwise 2636 */ 2637 if (qr_needed) { 2638 /* space to store Q */ 2639 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2640 /* first we issue queries for optimal work */ 2641 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2642 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2644 lqr_work = -1; 2645 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2646 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2647 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2648 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2649 lgqr_work = -1; 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2651 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2653 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2654 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2655 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2656 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2657 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2658 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2659 /* array to store scaling factors for reflectors */ 2660 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2661 /* array to store rhs and solution of triangular solver */ 2662 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2663 /* allocating workspace for check */ 2664 if (pcbddc->dbg_flag) { 2665 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2666 } 2667 } 2668 /* array to store whether a node is primal or not */ 2669 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2670 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2671 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2672 if (i != total_primal_vertices) { 2673 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2674 } 2675 for (i=0;i<total_primal_vertices;i++) { 2676 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2677 } 2678 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2679 2680 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2681 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2682 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2683 if (PetscBTLookup(change_basis,total_counts)) { 2684 /* get constraint info */ 2685 primal_dofs = constraints_n[total_counts]; 2686 dual_dofs = size_of_constraint-primal_dofs; 2687 2688 if (pcbddc->dbg_flag) { 2689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2690 } 2691 2692 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2693 2694 /* copy quadrature constraints for change of basis check */ 2695 if (pcbddc->dbg_flag) { 2696 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2697 } 2698 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2699 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2700 2701 /* compute QR decomposition of constraints */ 2702 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2704 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2705 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2706 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2707 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2708 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2709 2710 /* explictly compute R^-T */ 2711 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2712 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2713 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2717 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2718 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2719 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2720 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2721 2722 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2727 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2728 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2729 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2730 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2731 2732 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2733 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2734 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2737 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2738 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2739 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2740 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2741 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2742 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2743 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2744 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2745 2746 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2747 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2748 /* insert cols for primal dofs */ 2749 for (j=0;j<primal_dofs;j++) { 2750 start_vals = &qr_basis[j*size_of_constraint]; 2751 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2752 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2753 } 2754 /* insert cols for dual dofs */ 2755 for (j=0,k=0;j<dual_dofs;k++) { 2756 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2757 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2758 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2759 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2760 j++; 2761 } 2762 } 2763 2764 /* check change of basis */ 2765 if (pcbddc->dbg_flag) { 2766 PetscInt ii,jj; 2767 PetscBool valid_qr=PETSC_TRUE; 2768 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2769 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2770 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2771 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2772 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2773 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2774 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2775 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2776 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2777 for (jj=0;jj<size_of_constraint;jj++) { 2778 for (ii=0;ii<primal_dofs;ii++) { 2779 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2780 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2781 } 2782 } 2783 if (!valid_qr) { 2784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2785 for (jj=0;jj<size_of_constraint;jj++) { 2786 for (ii=0;ii<primal_dofs;ii++) { 2787 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2788 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2789 } 2790 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2791 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2792 } 2793 } 2794 } 2795 } else { 2796 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2797 } 2798 } 2799 } else { /* simple transformation block */ 2800 PetscInt row,col; 2801 PetscScalar val,norm; 2802 2803 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2804 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2805 for (j=0;j<size_of_constraint;j++) { 2806 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2807 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2808 if (!PetscBTLookup(is_primal,row_B)) { 2809 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2810 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2811 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2812 } else { 2813 for (k=0;k<size_of_constraint;k++) { 2814 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2815 if (row != col) { 2816 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2817 } else { 2818 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2819 } 2820 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2821 } 2822 } 2823 } 2824 if (pcbddc->dbg_flag) { 2825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2826 } 2827 } 2828 } else { 2829 if (pcbddc->dbg_flag) { 2830 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2831 } 2832 } 2833 } 2834 2835 /* free workspace */ 2836 if (qr_needed) { 2837 if (pcbddc->dbg_flag) { 2838 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2839 } 2840 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2841 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2842 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2843 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2844 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2845 } 2846 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2847 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2848 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2849 2850 /* assembling of global change of variable */ 2851 { 2852 Mat tmat; 2853 PetscInt bs; 2854 2855 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2856 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2857 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2858 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2859 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2860 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2861 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2862 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2863 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2864 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2865 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2867 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2868 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2869 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2870 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2871 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2872 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2873 } 2874 /* check */ 2875 if (pcbddc->dbg_flag) { 2876 PetscReal error; 2877 Vec x,x_change; 2878 2879 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2880 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2881 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2882 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2883 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2884 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2885 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2886 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2887 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2888 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2889 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2890 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2892 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2893 ierr = VecDestroy(&x);CHKERRQ(ierr); 2894 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2895 } 2896 2897 /* adapt sub_schurs computed (if any) */ 2898 if (pcbddc->use_deluxe_scaling) { 2899 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2900 if (sub_schurs->S_Ej_all) { 2901 Mat S_new,tmat; 2902 IS is_all_N; 2903 2904 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2905 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2906 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2907 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2908 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2909 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2910 sub_schurs->S_Ej_all = S_new; 2911 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2912 if (sub_schurs->sum_S_Ej_all) { 2913 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2914 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2915 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2916 sub_schurs->sum_S_Ej_all = S_new; 2917 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2918 } 2919 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2920 } 2921 } 2922 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2923 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2924 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2925 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2926 } 2927 2928 /* set up change of basis context */ 2929 if (pcbddc->ChangeOfBasisMatrix) { 2930 PCBDDCChange_ctx change_ctx; 2931 2932 if (!pcbddc->new_global_mat) { 2933 PetscInt global_size,local_size; 2934 2935 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2936 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2937 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2938 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2939 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2940 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2941 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2942 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2943 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2944 } else { 2945 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2946 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2947 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2948 } 2949 if (!pcbddc->user_ChangeOfBasisMatrix) { 2950 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2951 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2952 } else { 2953 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2954 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2955 } 2956 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2957 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2958 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 } 2961 2962 /* check if a new primal space has been introduced */ 2963 pcbddc->new_primal_space_local = PETSC_TRUE; 2964 if (olocal_primal_size == pcbddc->local_primal_size) { 2965 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2966 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2967 if (!pcbddc->new_primal_space_local) { 2968 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2969 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2970 } 2971 } 2972 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2973 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2974 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2975 2976 /* flush dbg viewer */ 2977 if (pcbddc->dbg_flag) { 2978 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2979 } 2980 2981 /* free workspace */ 2982 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2983 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2984 if (!pcbddc->adaptive_selection) { 2985 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2986 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2987 } else { 2988 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2989 pcbddc->adaptive_constraints_idxs_ptr, 2990 pcbddc->adaptive_constraints_data_ptr, 2991 pcbddc->adaptive_constraints_idxs, 2992 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2993 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2994 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3001 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3002 { 3003 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3004 PC_IS *pcis = (PC_IS*)pc->data; 3005 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3006 PetscInt ierr,i,vertex_size,N; 3007 PetscViewer viewer=pcbddc->dbg_viewer; 3008 3009 PetscFunctionBegin; 3010 /* Reset previously computed graph */ 3011 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3012 /* Init local Graph struct */ 3013 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3014 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3015 3016 /* Check validity of the csr graph passed in by the user */ 3017 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3018 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3019 } 3020 3021 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3022 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3023 PetscInt *xadj,*adjncy; 3024 PetscInt nvtxs; 3025 PetscBool flg_row=PETSC_FALSE; 3026 3027 if (pcbddc->use_local_adj) { 3028 3029 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3030 if (flg_row) { 3031 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3032 pcbddc->computed_rowadj = PETSC_TRUE; 3033 } 3034 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3035 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3036 IS is_dummy; 3037 ISLocalToGlobalMapping l2gmap_dummy; 3038 PetscInt j,sum; 3039 PetscInt *cxadj,*cadjncy; 3040 const PetscInt *idxs; 3041 PCBDDCGraph graph; 3042 PetscBT is_on_boundary; 3043 3044 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3045 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3046 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3047 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3048 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3049 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3050 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3051 if (flg_row) { 3052 graph->xadj = xadj; 3053 graph->adjncy = adjncy; 3054 } 3055 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3056 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3057 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3058 3059 if (pcbddc->dbg_flag) { 3060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3061 for (i=0;i<graph->ncc;i++) { 3062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3063 } 3064 } 3065 3066 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3067 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3068 for (i=0;i<pcis->n_B;i++) { 3069 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3070 } 3071 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3072 3073 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3074 sum = 0; 3075 for (i=0;i<graph->ncc;i++) { 3076 PetscInt sizecc = 0; 3077 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3078 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3079 sizecc++; 3080 } 3081 } 3082 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3083 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3084 cxadj[graph->queue[j]] = sizecc; 3085 } 3086 } 3087 sum += sizecc*sizecc; 3088 } 3089 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3090 sum = 0; 3091 for (i=0;i<pcis->n;i++) { 3092 PetscInt temp = cxadj[i]; 3093 cxadj[i] = sum; 3094 sum += temp; 3095 } 3096 cxadj[pcis->n] = sum; 3097 for (i=0;i<graph->ncc;i++) { 3098 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3099 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3100 PetscInt k,sizecc = 0; 3101 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3102 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3103 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3104 sizecc++; 3105 } 3106 } 3107 } 3108 } 3109 } 3110 if (sum) { 3111 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3114 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3115 } 3116 graph->xadj = 0; 3117 graph->adjncy = 0; 3118 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3119 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3120 } 3121 } 3122 if (pcbddc->dbg_flag) { 3123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3124 } 3125 3126 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3127 vertex_size = 1; 3128 if (pcbddc->user_provided_isfordofs) { 3129 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3130 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3131 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3132 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3133 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3134 } 3135 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3136 pcbddc->n_ISForDofs = 0; 3137 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3138 } 3139 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3140 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3141 } else { 3142 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3143 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3144 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3145 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3146 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3147 } 3148 } 3149 } 3150 3151 /* Setup of Graph */ 3152 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3153 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3154 } 3155 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3156 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3157 } 3158 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3159 3160 /* Graph's connected components analysis */ 3161 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3162 3163 /* print some info to stdout */ 3164 if (pcbddc->dbg_flag) { 3165 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3166 } 3167 3168 /* mark topography has done */ 3169 pcbddc->recompute_topography = PETSC_FALSE; 3170 PetscFunctionReturn(0); 3171 } 3172 3173 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "PCBDDCSubsetNumbering" 3176 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3177 { 3178 PetscSF sf; 3179 PetscLayout map; 3180 const PetscInt *idxs; 3181 PetscInt *leaf_data,*root_data,*gidxs; 3182 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3183 PetscInt n_n,nlocals,start,first_index; 3184 PetscMPIInt commsize; 3185 PetscBool first_found; 3186 PetscErrorCode ierr; 3187 3188 PetscFunctionBegin; 3189 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3190 if (subset_mult) { 3191 PetscCheckSameComm(subset,1,subset_mult,2); 3192 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3193 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3194 } 3195 /* create workspace layout for computing global indices of subset */ 3196 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3197 lbounds[0] = lbounds[1] = 0; 3198 for (i=0;i<n;i++) { 3199 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3200 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3201 } 3202 lbounds[0] = -lbounds[0]; 3203 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3204 gbounds[0] = -gbounds[0]; 3205 N = gbounds[1] - gbounds[0] + 1; 3206 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3207 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3208 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3209 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3210 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3211 3212 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3213 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3214 if (subset_mult) { 3215 const PetscInt* idxs_mult; 3216 3217 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3218 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3219 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3220 } else { 3221 for (i=0;i<n;i++) leaf_data[i] = 1; 3222 } 3223 /* local size of new subset */ 3224 n_n = 0; 3225 for (i=0;i<n;i++) n_n += leaf_data[i]; 3226 3227 /* global indexes in layout */ 3228 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3229 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3230 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3231 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3232 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3233 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3234 3235 /* reduce from leaves to roots */ 3236 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3237 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3238 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3239 3240 /* count indexes in local part of layout */ 3241 nlocals = 0; 3242 first_index = -1; 3243 first_found = PETSC_FALSE; 3244 for (i=0;i<Nl;i++) { 3245 if (!first_found && root_data[i]) { 3246 first_found = PETSC_TRUE; 3247 first_index = i; 3248 } 3249 nlocals += root_data[i]; 3250 } 3251 3252 /* cumulative of number of indexes and size of subset without holes */ 3253 start = 0; 3254 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3255 if (N_n) { /* compute total size of new subset if requested */ 3256 *N_n = start + nlocals; 3257 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3258 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3259 } 3260 /* adapt roof data with cumulative */ 3261 if (first_found) { 3262 PetscInt old_index; 3263 3264 root_data[first_index] += start; 3265 old_index = first_index; 3266 for (i=first_index+1;i<Nl;i++) { 3267 if (root_data[i]) { 3268 root_data[i] += root_data[old_index]; 3269 old_index = i; 3270 } 3271 } 3272 } 3273 3274 /* from roots to leaves */ 3275 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3276 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3277 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3278 3279 /* create new IS with global indexes without holes */ 3280 if (subset_mult) { 3281 const PetscInt* idxs_mult; 3282 PetscInt cum; 3283 3284 cum = 0; 3285 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3286 for (i=0;i<n;i++) { 3287 PetscInt j; 3288 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3289 } 3290 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3291 } else { 3292 for (i=0;i<n;i++) { 3293 gidxs[i] = leaf_data[i]-1; 3294 } 3295 } 3296 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3297 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 3301 #undef __FUNCT__ 3302 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3303 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3304 { 3305 PetscInt i,j; 3306 PetscScalar *alphas; 3307 PetscErrorCode ierr; 3308 3309 PetscFunctionBegin; 3310 /* this implements stabilized Gram-Schmidt */ 3311 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3312 for (i=0;i<n;i++) { 3313 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3314 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3315 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3316 } 3317 ierr = PetscFree(alphas);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatISGetSubassemblingPattern" 3323 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3324 { 3325 Mat subdomain_adj; 3326 IS new_ranks,ranks_send_to; 3327 MatPartitioning partitioner; 3328 Mat_IS *matis; 3329 PetscInt n_neighs,*neighs,*n_shared,**shared; 3330 PetscInt prank; 3331 PetscMPIInt size,rank,color; 3332 PetscInt *xadj,*adjncy,*oldranks; 3333 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3334 PetscInt i,local_size,threshold=0; 3335 PetscErrorCode ierr; 3336 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3337 PetscSubcomm subcomm; 3338 3339 PetscFunctionBegin; 3340 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3341 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3342 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3343 3344 /* Get info on mapping */ 3345 matis = (Mat_IS*)(mat->data); 3346 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3347 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3348 3349 /* build local CSR graph of subdomains' connectivity */ 3350 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3351 xadj[0] = 0; 3352 xadj[1] = PetscMax(n_neighs-1,0); 3353 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3354 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3355 3356 if (threshold) { 3357 PetscInt xadj_count = 0; 3358 for (i=1;i<n_neighs;i++) { 3359 if (n_shared[i] > threshold) { 3360 adjncy[xadj_count] = neighs[i]; 3361 adjncy_wgt[xadj_count] = n_shared[i]; 3362 xadj_count++; 3363 } 3364 } 3365 xadj[1] = xadj_count; 3366 } else { 3367 if (xadj[1]) { 3368 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3369 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3370 } 3371 } 3372 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3373 if (use_square) { 3374 for (i=0;i<xadj[1];i++) { 3375 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3376 } 3377 } 3378 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3379 3380 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3381 3382 /* 3383 Restrict work on active processes only. 3384 */ 3385 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3386 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3387 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3388 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3389 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3390 if (color) { 3391 ierr = PetscFree(xadj);CHKERRQ(ierr); 3392 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3393 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3394 } else { 3395 PetscInt coarsening_ratio; 3396 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3397 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3398 prank = rank; 3399 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3400 /* 3401 for (i=0;i<size;i++) { 3402 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3403 } 3404 */ 3405 for (i=0;i<xadj[1];i++) { 3406 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3407 } 3408 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3409 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3410 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3411 3412 /* Partition */ 3413 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3414 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3415 if (use_vwgt) { 3416 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3417 v_wgt[0] = local_size; 3418 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3419 } 3420 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3421 coarsening_ratio = size/n_subdomains; 3422 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3423 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3424 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3425 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3426 3427 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3428 if (contiguous) { 3429 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3430 } else { 3431 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3432 } 3433 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3434 /* clean up */ 3435 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3436 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3437 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3438 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3439 } 3440 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3441 3442 /* assemble parallel IS for sends */ 3443 i = 1; 3444 if (color) i=0; 3445 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3446 3447 /* get back IS */ 3448 *is_sends = ranks_send_to; 3449 PetscFunctionReturn(0); 3450 } 3451 3452 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3453 3454 #undef __FUNCT__ 3455 #define __FUNCT__ "MatISSubassemble" 3456 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3457 { 3458 Mat local_mat; 3459 Mat_IS *matis; 3460 IS is_sends_internal; 3461 PetscInt rows,cols,new_local_rows; 3462 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3463 PetscBool ismatis,isdense,newisdense,destroy_mat; 3464 ISLocalToGlobalMapping l2gmap; 3465 PetscInt* l2gmap_indices; 3466 const PetscInt* is_indices; 3467 MatType new_local_type; 3468 /* buffers */ 3469 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3470 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3471 PetscInt *recv_buffer_idxs_local; 3472 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3473 /* MPI */ 3474 MPI_Comm comm,comm_n; 3475 PetscSubcomm subcomm; 3476 PetscMPIInt n_sends,n_recvs,commsize; 3477 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3478 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3479 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3480 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3481 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3482 PetscErrorCode ierr; 3483 3484 PetscFunctionBegin; 3485 /* TODO: add missing checks */ 3486 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3487 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3488 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3489 PetscValidLogicalCollectiveInt(mat,nis,7); 3490 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3491 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3492 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3493 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3494 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3495 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3496 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3497 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3498 PetscInt mrows,mcols,mnrows,mncols; 3499 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3500 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3501 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3502 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3503 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3504 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3505 } 3506 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3507 PetscValidLogicalCollectiveInt(mat,bs,0); 3508 /* prepare IS for sending if not provided */ 3509 if (!is_sends) { 3510 PetscBool pcontig = PETSC_TRUE; 3511 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3512 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3513 } else { 3514 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3515 is_sends_internal = is_sends; 3516 } 3517 3518 /* get pointer of MATIS data */ 3519 matis = (Mat_IS*)mat->data; 3520 3521 /* get comm */ 3522 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3523 3524 /* compute number of sends */ 3525 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3526 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3527 3528 /* compute number of receives */ 3529 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3530 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3531 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3532 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3533 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3534 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3535 ierr = PetscFree(iflags);CHKERRQ(ierr); 3536 3537 /* restrict comm if requested */ 3538 subcomm = 0; 3539 destroy_mat = PETSC_FALSE; 3540 if (restrict_comm) { 3541 PetscMPIInt color,subcommsize; 3542 3543 color = 0; 3544 if (restrict_full) { 3545 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3546 } else { 3547 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3548 } 3549 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3550 subcommsize = commsize - subcommsize; 3551 /* check if reuse has been requested */ 3552 if (reuse == MAT_REUSE_MATRIX) { 3553 if (*mat_n) { 3554 PetscMPIInt subcommsize2; 3555 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3556 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3557 comm_n = PetscObjectComm((PetscObject)*mat_n); 3558 } else { 3559 comm_n = PETSC_COMM_SELF; 3560 } 3561 } else { /* MAT_INITIAL_MATRIX */ 3562 PetscMPIInt rank; 3563 3564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3565 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3566 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3567 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3568 comm_n = PetscSubcommChild(subcomm); 3569 } 3570 /* flag to destroy *mat_n if not significative */ 3571 if (color) destroy_mat = PETSC_TRUE; 3572 } else { 3573 comm_n = comm; 3574 } 3575 3576 /* prepare send/receive buffers */ 3577 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3578 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3579 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3580 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3581 if (nis) { 3582 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3583 } 3584 3585 /* Get data from local matrices */ 3586 if (!isdense) { 3587 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3588 /* TODO: See below some guidelines on how to prepare the local buffers */ 3589 /* 3590 send_buffer_vals should contain the raw values of the local matrix 3591 send_buffer_idxs should contain: 3592 - MatType_PRIVATE type 3593 - PetscInt size_of_l2gmap 3594 - PetscInt global_row_indices[size_of_l2gmap] 3595 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3596 */ 3597 } else { 3598 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3599 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3600 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3601 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3602 send_buffer_idxs[1] = i; 3603 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3604 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3605 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3606 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3607 for (i=0;i<n_sends;i++) { 3608 ilengths_vals[is_indices[i]] = len*len; 3609 ilengths_idxs[is_indices[i]] = len+2; 3610 } 3611 } 3612 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3613 /* additional is (if any) */ 3614 if (nis) { 3615 PetscMPIInt psum; 3616 PetscInt j; 3617 for (j=0,psum=0;j<nis;j++) { 3618 PetscInt plen; 3619 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3620 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3621 psum += len+1; /* indices + lenght */ 3622 } 3623 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3624 for (j=0,psum=0;j<nis;j++) { 3625 PetscInt plen; 3626 const PetscInt *is_array_idxs; 3627 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3628 send_buffer_idxs_is[psum] = plen; 3629 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3630 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3631 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3632 psum += plen+1; /* indices + lenght */ 3633 } 3634 for (i=0;i<n_sends;i++) { 3635 ilengths_idxs_is[is_indices[i]] = psum; 3636 } 3637 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3638 } 3639 3640 buf_size_idxs = 0; 3641 buf_size_vals = 0; 3642 buf_size_idxs_is = 0; 3643 for (i=0;i<n_recvs;i++) { 3644 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3645 buf_size_vals += (PetscInt)olengths_vals[i]; 3646 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3647 } 3648 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3649 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3650 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3651 3652 /* get new tags for clean communications */ 3653 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3654 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3655 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3656 3657 /* allocate for requests */ 3658 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3659 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3660 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3661 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3662 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3663 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3664 3665 /* communications */ 3666 ptr_idxs = recv_buffer_idxs; 3667 ptr_vals = recv_buffer_vals; 3668 ptr_idxs_is = recv_buffer_idxs_is; 3669 for (i=0;i<n_recvs;i++) { 3670 source_dest = onodes[i]; 3671 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3672 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3673 ptr_idxs += olengths_idxs[i]; 3674 ptr_vals += olengths_vals[i]; 3675 if (nis) { 3676 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3677 ptr_idxs_is += olengths_idxs_is[i]; 3678 } 3679 } 3680 for (i=0;i<n_sends;i++) { 3681 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3682 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3683 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3684 if (nis) { 3685 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3686 } 3687 } 3688 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3689 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3690 3691 /* assemble new l2g map */ 3692 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3693 ptr_idxs = recv_buffer_idxs; 3694 new_local_rows = 0; 3695 for (i=0;i<n_recvs;i++) { 3696 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3697 ptr_idxs += olengths_idxs[i]; 3698 } 3699 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3700 ptr_idxs = recv_buffer_idxs; 3701 new_local_rows = 0; 3702 for (i=0;i<n_recvs;i++) { 3703 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3704 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3705 ptr_idxs += olengths_idxs[i]; 3706 } 3707 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3708 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3709 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3710 3711 /* infer new local matrix type from received local matrices type */ 3712 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3713 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3714 if (n_recvs) { 3715 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3716 ptr_idxs = recv_buffer_idxs; 3717 for (i=0;i<n_recvs;i++) { 3718 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3719 new_local_type_private = MATAIJ_PRIVATE; 3720 break; 3721 } 3722 ptr_idxs += olengths_idxs[i]; 3723 } 3724 switch (new_local_type_private) { 3725 case MATDENSE_PRIVATE: 3726 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3727 new_local_type = MATSEQAIJ; 3728 bs = 1; 3729 } else { /* if I receive only 1 dense matrix */ 3730 new_local_type = MATSEQDENSE; 3731 bs = 1; 3732 } 3733 break; 3734 case MATAIJ_PRIVATE: 3735 new_local_type = MATSEQAIJ; 3736 bs = 1; 3737 break; 3738 case MATBAIJ_PRIVATE: 3739 new_local_type = MATSEQBAIJ; 3740 break; 3741 case MATSBAIJ_PRIVATE: 3742 new_local_type = MATSEQSBAIJ; 3743 break; 3744 default: 3745 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3746 break; 3747 } 3748 } else { /* by default, new_local_type is seqdense */ 3749 new_local_type = MATSEQDENSE; 3750 bs = 1; 3751 } 3752 3753 /* create MATIS object if needed */ 3754 if (reuse == MAT_INITIAL_MATRIX) { 3755 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3756 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3757 } else { 3758 /* it also destroys the local matrices */ 3759 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3760 } 3761 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3762 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3763 3764 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3765 3766 /* Global to local map of received indices */ 3767 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3768 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3769 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3770 3771 /* restore attributes -> type of incoming data and its size */ 3772 buf_size_idxs = 0; 3773 for (i=0;i<n_recvs;i++) { 3774 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3775 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3776 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3777 } 3778 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3779 3780 /* set preallocation */ 3781 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3782 if (!newisdense) { 3783 PetscInt *new_local_nnz=0; 3784 3785 ptr_vals = recv_buffer_vals; 3786 ptr_idxs = recv_buffer_idxs_local; 3787 if (n_recvs) { 3788 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3789 } 3790 for (i=0;i<n_recvs;i++) { 3791 PetscInt j; 3792 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3793 for (j=0;j<*(ptr_idxs+1);j++) { 3794 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3795 } 3796 } else { 3797 /* TODO */ 3798 } 3799 ptr_idxs += olengths_idxs[i]; 3800 } 3801 if (new_local_nnz) { 3802 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3803 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3804 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3805 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3806 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3807 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3808 } else { 3809 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3810 } 3811 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3812 } else { 3813 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3814 } 3815 3816 /* set values */ 3817 ptr_vals = recv_buffer_vals; 3818 ptr_idxs = recv_buffer_idxs_local; 3819 for (i=0;i<n_recvs;i++) { 3820 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3821 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3822 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3823 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3824 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3825 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3826 } else { 3827 /* TODO */ 3828 } 3829 ptr_idxs += olengths_idxs[i]; 3830 ptr_vals += olengths_vals[i]; 3831 } 3832 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3835 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3837 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3838 3839 #if 0 3840 if (!restrict_comm) { /* check */ 3841 Vec lvec,rvec; 3842 PetscReal infty_error; 3843 3844 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3845 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3846 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3847 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3848 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3849 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3850 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3851 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3852 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3853 } 3854 #endif 3855 3856 /* assemble new additional is (if any) */ 3857 if (nis) { 3858 PetscInt **temp_idxs,*count_is,j,psum; 3859 3860 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3861 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3862 ptr_idxs = recv_buffer_idxs_is; 3863 psum = 0; 3864 for (i=0;i<n_recvs;i++) { 3865 for (j=0;j<nis;j++) { 3866 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3867 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3868 psum += plen; 3869 ptr_idxs += plen+1; /* shift pointer to received data */ 3870 } 3871 } 3872 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3873 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3874 for (i=1;i<nis;i++) { 3875 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3876 } 3877 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3878 ptr_idxs = recv_buffer_idxs_is; 3879 for (i=0;i<n_recvs;i++) { 3880 for (j=0;j<nis;j++) { 3881 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3882 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3883 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3884 ptr_idxs += plen+1; /* shift pointer to received data */ 3885 } 3886 } 3887 for (i=0;i<nis;i++) { 3888 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3889 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3890 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3891 } 3892 ierr = PetscFree(count_is);CHKERRQ(ierr); 3893 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3894 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3895 } 3896 /* free workspace */ 3897 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3898 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3899 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3900 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3901 if (isdense) { 3902 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3903 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3904 } else { 3905 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3906 } 3907 if (nis) { 3908 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3909 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3910 } 3911 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3912 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3913 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3914 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3915 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3916 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3917 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3918 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3919 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3920 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3921 ierr = PetscFree(onodes);CHKERRQ(ierr); 3922 if (nis) { 3923 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3924 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3925 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3926 } 3927 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3928 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3929 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3930 for (i=0;i<nis;i++) { 3931 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3932 } 3933 *mat_n = NULL; 3934 } 3935 PetscFunctionReturn(0); 3936 } 3937 3938 /* temporary hack into ksp private data structure */ 3939 #include <petsc/private/kspimpl.h> 3940 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3943 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3944 { 3945 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3946 PC_IS *pcis = (PC_IS*)pc->data; 3947 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3948 MatNullSpace CoarseNullSpace=NULL; 3949 ISLocalToGlobalMapping coarse_islg; 3950 IS coarse_is,*isarray; 3951 PetscInt i,im_active=-1,active_procs=-1; 3952 PetscInt nis,nisdofs,nisneu; 3953 PC pc_temp; 3954 PCType coarse_pc_type; 3955 KSPType coarse_ksp_type; 3956 PetscBool multilevel_requested,multilevel_allowed; 3957 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3958 Mat t_coarse_mat_is; 3959 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3960 PetscMPIInt all_procs; 3961 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3962 PetscBool compute_vecs = PETSC_FALSE; 3963 PetscScalar *array; 3964 PetscErrorCode ierr; 3965 3966 PetscFunctionBegin; 3967 /* Assign global numbering to coarse dofs */ 3968 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3969 PetscInt ocoarse_size; 3970 compute_vecs = PETSC_TRUE; 3971 ocoarse_size = pcbddc->coarse_size; 3972 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3973 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3974 /* see if we can avoid some work */ 3975 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3976 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3977 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3978 coarse_reuse = PETSC_FALSE; 3979 } else { /* we can safely reuse already computed coarse matrix */ 3980 coarse_reuse = PETSC_TRUE; 3981 } 3982 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3983 coarse_reuse = PETSC_FALSE; 3984 } 3985 /* reset any subassembling information */ 3986 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3987 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3988 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3989 coarse_reuse = PETSC_TRUE; 3990 } 3991 3992 /* count "active" (i.e. with positive local size) and "void" processes */ 3993 im_active = !!(pcis->n); 3994 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3995 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3996 void_procs = all_procs-active_procs; 3997 csin_type_simple = PETSC_TRUE; 3998 redist = PETSC_FALSE; 3999 if (pcbddc->current_level && void_procs) { 4000 csin_ml = PETSC_TRUE; 4001 ncoarse_ml = void_procs; 4002 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4003 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4004 csin_ds = PETSC_TRUE; 4005 ncoarse_ds = pcbddc->redistribute_coarse; 4006 redist = PETSC_TRUE; 4007 } else { 4008 csin_ds = PETSC_TRUE; 4009 ncoarse_ds = active_procs; 4010 redist = PETSC_TRUE; 4011 } 4012 } else { 4013 csin_ml = PETSC_FALSE; 4014 ncoarse_ml = all_procs; 4015 if (void_procs) { 4016 csin_ds = PETSC_TRUE; 4017 ncoarse_ds = void_procs; 4018 csin_type_simple = PETSC_FALSE; 4019 } else { 4020 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4021 csin_ds = PETSC_TRUE; 4022 ncoarse_ds = pcbddc->redistribute_coarse; 4023 redist = PETSC_TRUE; 4024 } else { 4025 csin_ds = PETSC_FALSE; 4026 ncoarse_ds = all_procs; 4027 } 4028 } 4029 } 4030 4031 /* 4032 test if we can go multilevel: three conditions must be satisfied: 4033 - we have not exceeded the number of levels requested 4034 - we can actually subassemble the active processes 4035 - we can find a suitable number of MPI processes where we can place the subassembled problem 4036 */ 4037 multilevel_allowed = PETSC_FALSE; 4038 multilevel_requested = PETSC_FALSE; 4039 if (pcbddc->current_level < pcbddc->max_levels) { 4040 multilevel_requested = PETSC_TRUE; 4041 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4042 multilevel_allowed = PETSC_FALSE; 4043 } else { 4044 multilevel_allowed = PETSC_TRUE; 4045 } 4046 } 4047 /* determine number of process partecipating to coarse solver */ 4048 if (multilevel_allowed) { 4049 ncoarse = ncoarse_ml; 4050 csin = csin_ml; 4051 redist = PETSC_FALSE; 4052 } else { 4053 ncoarse = ncoarse_ds; 4054 csin = csin_ds; 4055 } 4056 4057 /* creates temporary l2gmap and IS for coarse indexes */ 4058 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4059 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4060 4061 /* creates temporary MATIS object for coarse matrix */ 4062 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4063 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4064 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4065 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4066 #if 0 4067 { 4068 PetscViewer viewer; 4069 char filename[256]; 4070 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4071 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4072 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4073 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4074 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4075 } 4076 #endif 4077 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4078 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4079 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4080 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4081 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4082 4083 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4084 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4085 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4086 const PetscInt *idxs; 4087 ISLocalToGlobalMapping tmap; 4088 4089 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4090 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4091 /* allocate space for temporary storage */ 4092 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4093 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4094 /* allocate for IS array */ 4095 nisdofs = pcbddc->n_ISForDofsLocal; 4096 nisneu = !!pcbddc->NeumannBoundariesLocal; 4097 nis = nisdofs + nisneu; 4098 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4099 /* dofs splitting */ 4100 for (i=0;i<nisdofs;i++) { 4101 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4102 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4103 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4104 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4105 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4106 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4107 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4108 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4109 } 4110 /* neumann boundaries */ 4111 if (pcbddc->NeumannBoundariesLocal) { 4112 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4113 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4114 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4115 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4116 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4117 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4119 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4120 } 4121 /* free memory */ 4122 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4123 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4124 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4125 } else { 4126 nis = 0; 4127 nisdofs = 0; 4128 nisneu = 0; 4129 isarray = NULL; 4130 } 4131 /* destroy no longer needed map */ 4132 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4133 4134 /* restrict on coarse candidates (if needed) */ 4135 coarse_mat_is = NULL; 4136 if (csin) { 4137 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4138 if (redist) { 4139 PetscMPIInt rank; 4140 PetscInt spc,n_spc_p1,dest[1],destsize; 4141 4142 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4143 spc = active_procs/ncoarse; 4144 n_spc_p1 = active_procs%ncoarse; 4145 if (im_active) { 4146 destsize = 1; 4147 if (rank > n_spc_p1*(spc+1)-1) { 4148 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4149 } else { 4150 dest[0] = rank/(spc+1); 4151 } 4152 } else { 4153 destsize = 0; 4154 } 4155 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4156 } else if (csin_type_simple) { 4157 PetscMPIInt rank; 4158 PetscInt issize,isidx; 4159 4160 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4161 if (im_active) { 4162 issize = 1; 4163 isidx = (PetscInt)rank; 4164 } else { 4165 issize = 0; 4166 isidx = -1; 4167 } 4168 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4169 } else { /* get a suitable subassembling pattern from MATIS code */ 4170 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4171 } 4172 4173 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4174 if (!redist || ncoarse <= void_procs) { 4175 PetscInt ncoarse_cand,tissize,*nisindices; 4176 PetscInt *coarse_candidates; 4177 const PetscInt* tisindices; 4178 4179 /* get coarse candidates' ranks in pc communicator */ 4180 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4181 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4182 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4183 if (!coarse_candidates[i]) { 4184 coarse_candidates[ncoarse_cand++]=i; 4185 } 4186 } 4187 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4188 4189 4190 if (pcbddc->dbg_flag) { 4191 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4192 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4193 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4194 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4195 for (i=0;i<ncoarse_cand;i++) { 4196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4197 } 4198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4199 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4200 } 4201 /* shift the pattern on coarse candidates */ 4202 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4203 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4204 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4205 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4206 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4207 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4208 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4209 } 4210 if (pcbddc->dbg_flag) { 4211 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4213 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4214 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4215 } 4216 } 4217 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4218 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4219 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4220 } else { /* this is the last level, so use just receiving processes in subcomm */ 4221 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4222 } 4223 } else { 4224 if (pcbddc->dbg_flag) { 4225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4228 } 4229 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4230 coarse_mat_is = t_coarse_mat_is; 4231 } 4232 4233 /* create local to global scatters for coarse problem */ 4234 if (compute_vecs) { 4235 PetscInt lrows; 4236 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4237 if (coarse_mat_is) { 4238 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4239 } else { 4240 lrows = 0; 4241 } 4242 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4243 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4244 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4245 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4246 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4247 } 4248 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4249 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4250 4251 /* set defaults for coarse KSP and PC */ 4252 if (multilevel_allowed) { 4253 coarse_ksp_type = KSPRICHARDSON; 4254 coarse_pc_type = PCBDDC; 4255 } else { 4256 coarse_ksp_type = KSPPREONLY; 4257 coarse_pc_type = PCREDUNDANT; 4258 } 4259 4260 /* print some info if requested */ 4261 if (pcbddc->dbg_flag) { 4262 if (!multilevel_allowed) { 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4264 if (multilevel_requested) { 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4266 } else if (pcbddc->max_levels) { 4267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4268 } 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 } 4271 } 4272 4273 /* create the coarse KSP object only once with defaults */ 4274 if (coarse_mat_is) { 4275 MatReuse coarse_mat_reuse; 4276 PetscViewer dbg_viewer = NULL; 4277 if (pcbddc->dbg_flag) { 4278 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4279 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4280 } 4281 if (!pcbddc->coarse_ksp) { 4282 char prefix[256],str_level[16]; 4283 size_t len; 4284 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4285 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4286 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4287 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4288 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4289 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4290 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4291 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4292 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4293 /* prefix */ 4294 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4295 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4296 if (!pcbddc->current_level) { 4297 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4298 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4299 } else { 4300 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4301 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4302 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4303 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4304 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4305 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4306 } 4307 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4308 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4309 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4310 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4311 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4312 /* allow user customization */ 4313 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4314 } 4315 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4316 if (nisdofs) { 4317 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4318 for (i=0;i<nisdofs;i++) { 4319 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4320 } 4321 } 4322 if (nisneu) { 4323 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4324 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4325 } 4326 4327 /* get some info after set from options */ 4328 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4329 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4330 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4331 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4332 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4333 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4334 isbddc = PETSC_FALSE; 4335 } 4336 if (isredundant) { 4337 KSP inner_ksp; 4338 PC inner_pc; 4339 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4340 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4341 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4342 } 4343 4344 /* assemble coarse matrix */ 4345 if (coarse_reuse) { 4346 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4347 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4348 coarse_mat_reuse = MAT_REUSE_MATRIX; 4349 } else { 4350 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4351 } 4352 if (isbddc || isnn) { 4353 if (pcbddc->coarsening_ratio > 1) { 4354 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4355 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4356 if (pcbddc->dbg_flag) { 4357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4358 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4359 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4360 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4361 } 4362 } 4363 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4364 } else { 4365 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4366 coarse_mat = coarse_mat_is; 4367 } 4368 } else { 4369 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4370 } 4371 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4372 4373 /* propagate symmetry info of coarse matrix */ 4374 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4375 if (pc->pmat->symmetric_set) { 4376 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4377 } 4378 if (pc->pmat->hermitian_set) { 4379 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4380 } 4381 if (pc->pmat->spd_set) { 4382 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4383 } 4384 /* set operators */ 4385 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4386 if (pcbddc->dbg_flag) { 4387 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4388 } 4389 } else { /* processes non partecipating to coarse solver (if any) */ 4390 coarse_mat = 0; 4391 } 4392 ierr = PetscFree(isarray);CHKERRQ(ierr); 4393 #if 0 4394 { 4395 PetscViewer viewer; 4396 char filename[256]; 4397 sprintf(filename,"coarse_mat.m"); 4398 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4399 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4400 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4401 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4402 } 4403 #endif 4404 4405 /* Compute coarse null space (special handling by BDDC only) */ 4406 if (pcbddc->NullSpace) { 4407 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4408 } 4409 4410 if (pcbddc->coarse_ksp) { 4411 Vec crhs,csol; 4412 PetscBool ispreonly; 4413 4414 if (CoarseNullSpace) { 4415 if (isbddc) { 4416 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4417 } else { 4418 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4419 } 4420 } 4421 /* setup coarse ksp */ 4422 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4423 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4424 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4425 /* hack */ 4426 if (!csol) { 4427 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4428 } 4429 if (!crhs) { 4430 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4431 } 4432 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4433 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4434 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4435 KSP check_ksp; 4436 KSPType check_ksp_type; 4437 PC check_pc; 4438 Vec check_vec,coarse_vec; 4439 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4440 PetscInt its; 4441 PetscBool compute_eigs; 4442 PetscReal *eigs_r,*eigs_c; 4443 PetscInt neigs; 4444 const char *prefix; 4445 4446 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4447 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4448 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4449 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4450 if (ispreonly) { 4451 check_ksp_type = KSPPREONLY; 4452 compute_eigs = PETSC_FALSE; 4453 } else { 4454 check_ksp_type = KSPGMRES; 4455 compute_eigs = PETSC_TRUE; 4456 } 4457 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4458 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4459 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4460 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4461 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4462 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4463 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4464 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4465 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4466 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4467 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4468 /* create random vec */ 4469 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4470 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4471 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4472 if (CoarseNullSpace) { 4473 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4474 } 4475 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4476 /* solve coarse problem */ 4477 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4478 if (CoarseNullSpace) { 4479 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4480 } 4481 /* set eigenvalue estimation if preonly has not been requested */ 4482 if (compute_eigs) { 4483 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4484 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4485 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4486 lambda_max = eigs_r[neigs-1]; 4487 lambda_min = eigs_r[0]; 4488 if (pcbddc->use_coarse_estimates) { 4489 if (lambda_max>lambda_min) { 4490 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4491 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4492 } 4493 } 4494 } 4495 4496 /* check coarse problem residual error */ 4497 if (pcbddc->dbg_flag) { 4498 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4499 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4500 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4501 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4502 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4503 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4504 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4505 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4506 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4507 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4508 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4509 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4510 if (compute_eigs) { 4511 PetscReal lambda_max_s,lambda_min_s; 4512 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4513 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4514 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4515 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4516 for (i=0;i<neigs;i++) { 4517 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4518 } 4519 } 4520 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4522 } 4523 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4524 if (compute_eigs) { 4525 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4526 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4527 } 4528 } 4529 } 4530 /* print additional info */ 4531 if (pcbddc->dbg_flag) { 4532 /* waits until all processes reaches this point */ 4533 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4534 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4535 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4536 } 4537 4538 /* free memory */ 4539 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4540 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4541 PetscFunctionReturn(0); 4542 } 4543 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4546 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4547 { 4548 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4549 PC_IS* pcis = (PC_IS*)pc->data; 4550 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4551 IS subset,subset_mult,subset_n; 4552 PetscInt local_size,coarse_size=0; 4553 PetscInt *local_primal_indices=NULL; 4554 const PetscInt *t_local_primal_indices; 4555 PetscErrorCode ierr; 4556 4557 PetscFunctionBegin; 4558 /* Compute global number of coarse dofs */ 4559 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4561 } 4562 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4563 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4564 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4565 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4566 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4567 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4568 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4569 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4570 if (local_size != pcbddc->local_primal_size) { 4571 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4572 } 4573 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4574 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4575 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4576 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4577 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4578 4579 /* check numbering */ 4580 if (pcbddc->dbg_flag) { 4581 PetscScalar coarsesum,*array; 4582 PetscInt i; 4583 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4584 4585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4588 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4589 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4590 for (i=0;i<pcbddc->local_primal_size;i++) { 4591 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4592 } 4593 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4594 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4595 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4596 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4597 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4598 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4599 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4600 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4601 for (i=0;i<pcis->n;i++) { 4602 if (array[i] == 1.0) { 4603 set_error = PETSC_TRUE; 4604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4605 } 4606 } 4607 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4608 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4609 for (i=0;i<pcis->n;i++) { 4610 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4611 } 4612 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4613 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4614 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4615 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4616 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4618 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4619 PetscInt *gidxs; 4620 4621 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4622 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4623 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4626 for (i=0;i<pcbddc->local_primal_size;i++) { 4627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4628 } 4629 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4630 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4631 } 4632 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4633 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4634 } 4635 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4636 /* get back data */ 4637 *coarse_size_n = coarse_size; 4638 *local_primal_indices_n = local_primal_indices; 4639 PetscFunctionReturn(0); 4640 } 4641 4642 #undef __FUNCT__ 4643 #define __FUNCT__ "PCBDDCGlobalToLocal" 4644 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4645 { 4646 IS localis_t; 4647 PetscInt i,lsize,*idxs,n; 4648 PetscScalar *vals; 4649 PetscErrorCode ierr; 4650 4651 PetscFunctionBegin; 4652 /* get indices in local ordering exploiting local to global map */ 4653 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4654 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4655 for (i=0;i<lsize;i++) vals[i] = 1.0; 4656 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4657 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4658 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4659 if (idxs) { /* multilevel guard */ 4660 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4661 } 4662 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4663 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4664 ierr = PetscFree(vals);CHKERRQ(ierr); 4665 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4666 /* now compute set in local ordering */ 4667 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4668 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4669 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4670 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4671 for (i=0,lsize=0;i<n;i++) { 4672 if (PetscRealPart(vals[i]) > 0.5) { 4673 lsize++; 4674 } 4675 } 4676 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4677 for (i=0,lsize=0;i<n;i++) { 4678 if (PetscRealPart(vals[i]) > 0.5) { 4679 idxs[lsize++] = i; 4680 } 4681 } 4682 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4683 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4684 *localis = localis_t; 4685 PetscFunctionReturn(0); 4686 } 4687 4688 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4689 #undef __FUNCT__ 4690 #define __FUNCT__ "PCBDDCMatMult_Private" 4691 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4692 { 4693 PCBDDCChange_ctx change_ctx; 4694 PetscErrorCode ierr; 4695 4696 PetscFunctionBegin; 4697 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4698 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4699 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4700 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4701 PetscFunctionReturn(0); 4702 } 4703 4704 #undef __FUNCT__ 4705 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4706 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4707 { 4708 PCBDDCChange_ctx change_ctx; 4709 PetscErrorCode ierr; 4710 4711 PetscFunctionBegin; 4712 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4713 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4714 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4715 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4716 PetscFunctionReturn(0); 4717 } 4718 4719 #undef __FUNCT__ 4720 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4721 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4722 { 4723 PC_IS *pcis=(PC_IS*)pc->data; 4724 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4725 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4726 Mat S_j; 4727 PetscInt *used_xadj,*used_adjncy; 4728 PetscBool free_used_adj; 4729 PetscErrorCode ierr; 4730 4731 PetscFunctionBegin; 4732 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4733 free_used_adj = PETSC_FALSE; 4734 if (pcbddc->sub_schurs_layers == -1) { 4735 used_xadj = NULL; 4736 used_adjncy = NULL; 4737 } else { 4738 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4739 used_xadj = pcbddc->mat_graph->xadj; 4740 used_adjncy = pcbddc->mat_graph->adjncy; 4741 } else if (pcbddc->computed_rowadj) { 4742 used_xadj = pcbddc->mat_graph->xadj; 4743 used_adjncy = pcbddc->mat_graph->adjncy; 4744 } else { 4745 PetscBool flg_row=PETSC_FALSE; 4746 const PetscInt *xadj,*adjncy; 4747 PetscInt nvtxs; 4748 4749 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4750 if (flg_row) { 4751 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4752 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4753 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4754 free_used_adj = PETSC_TRUE; 4755 } else { 4756 pcbddc->sub_schurs_layers = -1; 4757 used_xadj = NULL; 4758 used_adjncy = NULL; 4759 } 4760 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4761 } 4762 } 4763 4764 /* setup sub_schurs data */ 4765 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4766 if (!sub_schurs->use_mumps) { 4767 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4768 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4769 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4770 } else { 4771 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4772 PetscBool isseqaij; 4773 if (!pcbddc->use_vertices && reuse_solvers) { 4774 PetscInt n_vertices; 4775 4776 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4777 reuse_solvers = (PetscBool)!n_vertices; 4778 } 4779 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4780 if (!isseqaij) { 4781 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4782 if (matis->A == pcbddc->local_mat) { 4783 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4784 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4785 } else { 4786 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4787 } 4788 } 4789 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4790 } 4791 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4792 4793 /* free adjacency */ 4794 if (free_used_adj) { 4795 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4796 } 4797 PetscFunctionReturn(0); 4798 } 4799 4800 #undef __FUNCT__ 4801 #define __FUNCT__ "PCBDDCInitSubSchurs" 4802 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4803 { 4804 PC_IS *pcis=(PC_IS*)pc->data; 4805 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4806 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4807 PCBDDCGraph graph; 4808 PetscErrorCode ierr; 4809 4810 PetscFunctionBegin; 4811 /* attach interface graph for determining subsets */ 4812 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4813 IS verticesIS,verticescomm; 4814 PetscInt vsize,*idxs; 4815 4816 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4817 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4818 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4819 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4820 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4821 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4822 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4823 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4824 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4825 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4826 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4827 /* 4828 if (pcbddc->dbg_flag) { 4829 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4830 } 4831 */ 4832 } else { 4833 graph = pcbddc->mat_graph; 4834 } 4835 4836 /* sub_schurs init */ 4837 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4838 4839 /* free graph struct */ 4840 if (pcbddc->sub_schurs_rebuild) { 4841 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4842 } 4843 PetscFunctionReturn(0); 4844 } 4845