1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 273 } else { 274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 291 for (j=0;j<B_N;j++) { 292 #if defined(PETSC_USE_COMPLEX) 293 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 294 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 295 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 296 #else 297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 298 #endif 299 } 300 } 301 } 302 #if 0 303 for (j=0;j<B_neigs;j++) { 304 PetscBLASInt Blas_N,Blas_one = 1.0; 305 PetscScalar norm; 306 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 307 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 308 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 310 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 311 } else { 312 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 313 } 314 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 315 } 316 #endif 317 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 318 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 319 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 320 cum++; 321 } 322 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 323 /* shift for next computation */ 324 cumarray += subset_size*subset_size; 325 } 326 if (pcbddc->dbg_flag) { 327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 328 } 329 330 if (mss) { 331 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 332 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 333 /* destroy matrices (junk) */ 334 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 335 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 336 } 337 if (allocated_S_St) { 338 ierr = PetscFree2(S,St);CHKERRQ(ierr); 339 } 340 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 ierr = PetscFree(rwork);CHKERRQ(ierr); 343 #endif 344 if (pcbddc->dbg_flag) { 345 PetscInt maxneigs_r; 346 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "PCBDDCSetUpSolvers" 354 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 355 { 356 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 357 PetscScalar *coarse_submat_vals; 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 /* Setup local scatters R_to_B and (optionally) R_to_D */ 362 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 363 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 364 365 /* Setup local neumann solver ksp_R */ 366 /* PCBDDCSetUpLocalScatters should be called first! */ 367 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 368 369 /* Change global null space passed in by the user if change of basis has been requested */ 370 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 371 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 372 } 373 374 /* 375 Setup local correction and local part of coarse basis. 376 Gives back the dense local part of the coarse matrix in column major ordering 377 */ 378 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 379 380 /* Compute total number of coarse nodes and setup coarse solver */ 381 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 382 383 /* free */ 384 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetCustomization" 390 PetscErrorCode PCBDDCResetCustomization(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 397 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 398 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 399 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 401 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 402 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 403 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 405 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 406 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCBDDCResetTopography" 412 PetscErrorCode PCBDDCResetTopography(PC pc) 413 { 414 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 421 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 422 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCResetSolvers" 428 PetscErrorCode PCBDDCResetSolvers(PC pc) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 PetscScalar *array; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 436 if (pcbddc->coarse_phi_B) { 437 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 438 ierr = PetscFree(array);CHKERRQ(ierr); 439 } 440 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 444 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 449 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 451 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 452 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 453 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 454 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 455 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 456 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 459 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 460 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 461 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 462 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 469 { 470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 471 PC_IS *pcis = (PC_IS*)pc->data; 472 VecType impVecType; 473 PetscInt n_constraints,n_R,old_size; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 if (!pcbddc->ConstraintMatrix) { 478 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 479 } 480 /* get sizes */ 481 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 482 n_R = pcis->n-pcbddc->n_vertices; 483 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 484 /* local work vectors (try to avoid unneeded work)*/ 485 /* R nodes */ 486 old_size = -1; 487 if (pcbddc->vec1_R) { 488 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 489 } 490 if (n_R != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 492 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 493 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 495 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 496 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 497 } 498 /* local primal dofs */ 499 old_size = -1; 500 if (pcbddc->vec1_P) { 501 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 502 } 503 if (pcbddc->local_primal_size != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 508 } 509 /* local explicit constraints */ 510 old_size = -1; 511 if (pcbddc->vec1_C) { 512 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 513 } 514 if (n_constraints && n_constraints != old_size) { 515 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 516 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 518 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCBDDCSetUpCorrection" 525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 526 { 527 PetscErrorCode ierr; 528 /* pointers to pcis and pcbddc */ 529 PC_IS* pcis = (PC_IS*)pc->data; 530 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 531 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 532 /* submatrices of local problem */ 533 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 534 /* submatrices of local coarse problem */ 535 Mat S_VV,S_CV,S_VC,S_CC; 536 /* working matrices */ 537 Mat C_CR; 538 /* additional working stuff */ 539 PC pc_R; 540 Mat F; 541 PetscBool isLU,isCHOL,isILU; 542 543 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 544 PetscScalar *work; 545 PetscInt *idx_V_B; 546 PetscInt n,n_vertices,n_constraints; 547 PetscInt i,n_R,n_D,n_B; 548 PetscBool unsymmetric_check; 549 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 550 MatType impMatType; 551 /* some shortcuts to scalars */ 552 PetscScalar one=1.0,m_one=-1.0; 553 554 PetscFunctionBegin; 555 n_vertices = pcbddc->n_vertices; 556 n_constraints = pcbddc->local_primal_size-n_vertices; 557 /* Set Non-overlapping dimensions */ 558 n_B = pcis->n_B; 559 n_D = pcis->n - n_B; 560 n_R = pcis->n - n_vertices; 561 562 /* Set types for local objects needed by BDDC precondtioner */ 563 impMatType = MATSEQDENSE; 564 565 /* vertices in boundary numbering */ 566 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 567 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 568 if (i != n_vertices) { 569 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 570 } 571 572 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 573 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 576 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 577 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 579 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 581 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 582 583 unsymmetric_check = PETSC_FALSE; 584 /* allocate workspace */ 585 n = 0; 586 if (n_constraints) { 587 n += n_R*n_constraints; 588 } 589 if (n_vertices) { 590 n = PetscMax(2*n_R*n_vertices,n); 591 n = PetscMax((n_R+n_B)*n_vertices,n); 592 } 593 if (!pcbddc->symmetric_primal) { 594 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 595 unsymmetric_check = PETSC_TRUE; 596 } 597 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 598 599 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 600 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 601 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 602 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 603 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 604 if (isLU || isILU || isCHOL) { 605 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 606 } else if (sub_schurs->reuse_mumps) { 607 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 608 MatFactorType type; 609 610 F = reuse_mumps->F; 611 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 612 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 613 } else { 614 F = NULL; 615 } 616 617 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 618 if (n_constraints) { 619 Mat M1,M2,M3; 620 Mat auxmat; 621 IS is_aux; 622 PetscScalar *array,*array2; 623 624 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 626 627 /* Extract constraints on R nodes: C_{CR} */ 628 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 629 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 630 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 631 632 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 633 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 634 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 635 for (i=0;i<n_constraints;i++) { 636 const PetscScalar *row_cmat_values; 637 const PetscInt *row_cmat_indices; 638 PetscInt size_of_constraint,j; 639 640 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 641 for (j=0;j<size_of_constraint;j++) { 642 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 643 } 644 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 647 if (F) { 648 Mat B; 649 650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 651 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 652 ierr = MatDestroy(&B);CHKERRQ(ierr); 653 } else { 654 PetscScalar *marr; 655 656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 663 } 664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 665 } 666 if (!pcbddc->switch_static) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 670 for (i=0;i<n_constraints;i++) { 671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 677 } 678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 680 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 681 } else { 682 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 683 pcbddc->local_auxmat2 = local_auxmat2_R; 684 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 685 } 686 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 687 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 688 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 689 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 690 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 691 if (isCHOL) { 692 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 693 } else { 694 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 695 } 696 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 697 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 698 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 699 ierr = MatDestroy(&M2);CHKERRQ(ierr); 700 ierr = MatDestroy(&M3);CHKERRQ(ierr); 701 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 702 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 703 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 704 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 705 ierr = MatDestroy(&M1);CHKERRQ(ierr); 706 } 707 /* Get submatrices from subdomain matrix */ 708 if (n_vertices) { 709 IS is_aux; 710 711 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 712 IS tis; 713 714 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 715 ierr = ISSort(tis);CHKERRQ(ierr); 716 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 ierr = ISDestroy(&tis);CHKERRQ(ierr); 718 } else { 719 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 } 721 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 724 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 725 } 726 727 /* Matrix of coarse basis functions (local) */ 728 if (pcbddc->coarse_phi_B) { 729 PetscInt on_B,on_primal,on_D=n_D; 730 if (pcbddc->coarse_phi_D) { 731 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 732 } 733 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 734 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 735 PetscScalar *marray; 736 737 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 738 ierr = PetscFree(marray);CHKERRQ(ierr); 739 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 740 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 741 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 743 } 744 } 745 746 if (!pcbddc->coarse_phi_B) { 747 PetscScalar *marray; 748 749 n = n_B*pcbddc->local_primal_size; 750 if (pcbddc->switch_static || pcbddc->dbg_flag) { 751 n += n_D*pcbddc->local_primal_size; 752 } 753 if (!pcbddc->symmetric_primal) { 754 n *= 2; 755 } 756 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 n = n_B*pcbddc->local_primal_size; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 n += n_D*pcbddc->local_primal_size; 762 } 763 if (!pcbddc->symmetric_primal) { 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n = n_B*pcbddc->local_primal_size; 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 768 } 769 } else { 770 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 771 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 774 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 775 } 776 } 777 } 778 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 779 /* vertices */ 780 if (n_vertices) { 781 782 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 783 784 if (n_R) { 785 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 786 PetscBLASInt B_N,B_one = 1; 787 PetscScalar *x,*y; 788 PetscBool isseqaij; 789 790 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 791 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 793 if (F) { /* TODO could be optimized for symmetric problems */ 794 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 795 } else { 796 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 797 for (i=0;i<n_vertices;i++) { 798 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 799 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 800 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 801 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 802 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 803 } 804 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 805 } 806 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 807 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 808 if (n_constraints) { 809 Mat B; 810 811 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 812 for (i=0;i<n_vertices;i++) { 813 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 814 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 818 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 819 } 820 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 822 ierr = MatDestroy(&B);CHKERRQ(ierr); 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 825 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 826 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 827 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 828 ierr = MatDestroy(&B);CHKERRQ(ierr); 829 } 830 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 831 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 832 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 833 } 834 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 835 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 836 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 837 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 838 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 840 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 842 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 843 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 844 } else { 845 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 } 847 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 848 /* coarse basis functions */ 849 for (i=0;i<n_vertices;i++) { 850 PetscScalar *y; 851 852 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 854 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 y[n_B*i+idx_V_B[i]] = 1.0; 858 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 859 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 860 861 if (pcbddc->switch_static || pcbddc->dbg_flag) { 862 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 863 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 864 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 867 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 868 } 869 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 870 } 871 /* if n_R == 0 the object is not destroyed */ 872 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 873 } 874 875 if (n_constraints) { 876 Mat B; 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 879 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 880 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 881 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 882 if (n_vertices) { 883 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 884 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 885 } else { 886 Mat S_VCt; 887 888 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 889 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 891 } 892 } 893 ierr = MatDestroy(&B);CHKERRQ(ierr); 894 /* coarse basis functions */ 895 for (i=0;i<n_constraints;i++) { 896 PetscScalar *y; 897 898 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 899 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 900 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 901 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 904 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 905 if (pcbddc->switch_static || pcbddc->dbg_flag) { 906 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 907 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 908 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 911 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 912 } 913 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 914 } 915 } 916 if (n_constraints) { 917 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 918 } 919 920 /* compute other basis functions for non-symmetric problems */ 921 if (!pcbddc->symmetric_primal) { 922 923 if (n_constraints) { 924 Mat S_CCT,B_C; 925 926 /* this is a lazy thing */ 927 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 928 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 929 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 930 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 931 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 932 if (n_vertices) { 933 Mat B_V,S_VCT; 934 935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 936 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 937 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 938 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 940 } 941 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 942 } else { /* if there are no constraints, reset work */ 943 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 944 } 945 if (n_vertices && n_R) { 946 Mat A_VRT; 947 PetscScalar *marray; 948 PetscBLASInt B_N,B_one = 1; 949 950 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 951 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 952 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 953 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 954 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 955 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 957 } 958 959 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 960 for (i=0;i<pcbddc->local_primal_size;i++) { 961 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 962 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 963 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 964 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 965 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 966 } 967 } else { 968 for (i=0;i<pcbddc->local_primal_size;i++) { 969 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 970 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 971 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 972 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 973 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 974 } 975 } 976 /* coarse basis functions */ 977 for (i=0;i<pcbddc->local_primal_size;i++) { 978 PetscScalar *y; 979 980 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 981 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 982 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 if (i<n_vertices) { 986 y[n_B*i+idx_V_B[i]] = 1.0; 987 } 988 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 989 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 990 991 if (pcbddc->switch_static || pcbddc->dbg_flag) { 992 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 993 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 994 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 995 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 997 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 998 } 999 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1000 } 1001 } 1002 /* free memory */ 1003 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1004 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1005 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1006 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1008 ierr = PetscFree(work);CHKERRQ(ierr); 1009 if (n_vertices) { 1010 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1011 } 1012 if (n_constraints) { 1013 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1014 } 1015 /* Checking coarse_sub_mat and coarse basis functios */ 1016 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1017 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1018 if (pcbddc->dbg_flag) { 1019 Mat coarse_sub_mat; 1020 Mat AUXMAT,TM1,TM2,TM3,TM4; 1021 Mat coarse_phi_D,coarse_phi_B; 1022 Mat coarse_psi_D,coarse_psi_B; 1023 Mat A_II,A_BB,A_IB,A_BI; 1024 Mat C_B,CPHI; 1025 IS is_dummy; 1026 Vec mones; 1027 MatType checkmattype=MATSEQAIJ; 1028 PetscReal real_value; 1029 1030 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1031 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1032 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1033 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1034 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1035 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1036 if (unsymmetric_check) { 1037 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1039 } 1040 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1041 1042 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1043 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1045 if (unsymmetric_check) { 1046 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1047 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1048 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 } else { 1059 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1060 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 } 1068 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1069 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1070 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1071 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1074 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1076 1077 /* check constraints */ 1078 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1079 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1080 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1081 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1082 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1083 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1084 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1086 if (unsymmetric_check) { 1087 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 } 1093 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1094 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1095 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1096 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1097 1098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1108 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1109 if (unsymmetric_check) { 1110 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1112 } 1113 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1114 } 1115 /* get back data */ 1116 *coarse_submat_vals_n = coarse_submat_vals; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1122 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1123 { 1124 Mat *work_mat; 1125 IS isrow_s,iscol_s; 1126 PetscBool rsorted,csorted; 1127 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1132 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1134 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1135 1136 if (!rsorted) { 1137 const PetscInt *idxs; 1138 PetscInt *idxs_sorted,i; 1139 1140 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1142 for (i=0;i<rsize;i++) { 1143 idxs_perm_r[i] = i; 1144 } 1145 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1146 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1147 for (i=0;i<rsize;i++) { 1148 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1149 } 1150 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1151 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1152 } else { 1153 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1154 isrow_s = isrow; 1155 } 1156 1157 if (!csorted) { 1158 if (isrow == iscol) { 1159 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1160 iscol_s = isrow_s; 1161 } else { 1162 const PetscInt *idxs; 1163 PetscInt *idxs_sorted,i; 1164 1165 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1167 for (i=0;i<csize;i++) { 1168 idxs_perm_c[i] = i; 1169 } 1170 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1171 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1172 for (i=0;i<csize;i++) { 1173 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1174 } 1175 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1176 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1177 } 1178 } else { 1179 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1180 iscol_s = iscol; 1181 } 1182 1183 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1184 1185 if (!rsorted || !csorted) { 1186 Mat new_mat; 1187 IS is_perm_r,is_perm_c; 1188 1189 if (!rsorted) { 1190 PetscInt *idxs_r,i; 1191 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1192 for (i=0;i<rsize;i++) { 1193 idxs_r[idxs_perm_r[i]] = i; 1194 } 1195 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1196 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1197 } else { 1198 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1199 } 1200 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1201 1202 if (!csorted) { 1203 if (isrow_s == iscol_s) { 1204 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1205 is_perm_c = is_perm_r; 1206 } else { 1207 PetscInt *idxs_c,i; 1208 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1209 for (i=0;i<csize;i++) { 1210 idxs_c[idxs_perm_c[i]] = i; 1211 } 1212 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1213 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1214 } 1215 } else { 1216 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1219 1220 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1221 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1222 work_mat[0] = new_mat; 1223 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1224 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1225 } 1226 1227 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1228 *B = work_mat[0]; 1229 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1230 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1231 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1237 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1238 { 1239 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1240 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1241 Mat new_mat; 1242 IS is_local,is_global; 1243 PetscInt local_size; 1244 PetscBool isseqaij; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1249 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1250 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1252 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1253 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1254 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1255 1256 /* check */ 1257 if (pcbddc->dbg_flag) { 1258 Vec x,x_change; 1259 PetscReal error; 1260 1261 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1262 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1263 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1267 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1269 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1270 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x);CHKERRQ(ierr); 1274 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1275 } 1276 1277 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1278 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1279 if (isseqaij) { 1280 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1281 } else { 1282 Mat work_mat; 1283 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1284 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1286 } 1287 if (matis->A->symmetric_set) { 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1289 #if !defined(PETSC_USE_COMPLEX) 1290 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1291 #endif 1292 } 1293 /* 1294 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1295 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1296 */ 1297 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1303 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1304 { 1305 PC_IS* pcis = (PC_IS*)(pc->data); 1306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1308 PetscInt *idx_R_local=NULL; 1309 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1310 PetscInt vbs,bs; 1311 PetscBT bitmask=NULL; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 /* 1316 No need to setup local scatters if 1317 - primal space is unchanged 1318 AND 1319 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1320 AND 1321 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1322 */ 1323 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1324 PetscFunctionReturn(0); 1325 } 1326 /* destroy old objects */ 1327 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1328 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1329 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1330 /* Set Non-overlapping dimensions */ 1331 n_B = pcis->n_B; 1332 n_D = pcis->n - n_B; 1333 n_vertices = pcbddc->n_vertices; 1334 1335 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1336 1337 /* create auxiliary bitmask and allocate workspace */ 1338 if (!sub_schurs->reuse_mumps) { 1339 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1340 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1341 for (i=0;i<n_vertices;i++) { 1342 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1343 } 1344 1345 for (i=0, n_R=0; i<pcis->n; i++) { 1346 if (!PetscBTLookup(bitmask,i)) { 1347 idx_R_local[n_R++] = i; 1348 } 1349 } 1350 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1351 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1352 1353 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1354 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1355 } 1356 1357 /* Block code */ 1358 vbs = 1; 1359 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1360 if (bs>1 && !(n_vertices%bs)) { 1361 PetscBool is_blocked = PETSC_TRUE; 1362 PetscInt *vary; 1363 if (!sub_schurs->reuse_mumps) { 1364 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1365 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1366 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1367 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1368 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1369 for (i=0; i<pcis->n/bs; i++) { 1370 if (vary[i]!=0 && vary[i]!=bs) { 1371 is_blocked = PETSC_FALSE; 1372 break; 1373 } 1374 } 1375 ierr = PetscFree(vary);CHKERRQ(ierr); 1376 } else { 1377 /* Verify directly the R set */ 1378 for (i=0; i<n_R/bs; i++) { 1379 PetscInt j,node=idx_R_local[bs*i]; 1380 for (j=1; j<bs; j++) { 1381 if (node != idx_R_local[bs*i+j]-j) { 1382 is_blocked = PETSC_FALSE; 1383 break; 1384 } 1385 } 1386 } 1387 } 1388 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1389 vbs = bs; 1390 for (i=0;i<n_R/vbs;i++) { 1391 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1392 } 1393 } 1394 } 1395 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1396 if (sub_schurs->reuse_mumps) { 1397 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1398 1399 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1400 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1401 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1402 reuse_mumps->is_R = pcbddc->is_R_local; 1403 } else { 1404 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1405 } 1406 1407 /* print some info if requested */ 1408 if (pcbddc->dbg_flag) { 1409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 } 1417 1418 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1419 if (!sub_schurs->reuse_mumps) { 1420 IS is_aux1,is_aux2; 1421 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1422 1423 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1425 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1426 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1427 for (i=0; i<n_D; i++) { 1428 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1429 } 1430 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0, j=0; i<n_R; i++) { 1432 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1433 aux_array1[j++] = i; 1434 } 1435 } 1436 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1437 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 for (i=0, j=0; i<n_B; i++) { 1439 if (!PetscBTLookup(bitmask,is_indices[i])) { 1440 aux_array2[j++] = i; 1441 } 1442 } 1443 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1447 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1448 1449 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1450 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_R; i++) { 1452 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1453 aux_array1[j++] = i; 1454 } 1455 } 1456 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1459 } 1460 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1461 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1462 } else { 1463 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1464 IS tis; 1465 PetscInt schur_size; 1466 1467 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1468 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1469 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1470 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1472 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 } 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1483 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1484 { 1485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1486 PC_IS *pcis = (PC_IS*)pc->data; 1487 PC pc_temp; 1488 Mat A_RR; 1489 MatReuse reuse; 1490 PetscScalar m_one = -1.0; 1491 PetscReal value; 1492 PetscInt n_D,n_R; 1493 PetscBool use_exact,use_exact_reduced,issbaij; 1494 PetscErrorCode ierr; 1495 /* prefixes stuff */ 1496 char dir_prefix[256],neu_prefix[256],str_level[16]; 1497 size_t len; 1498 1499 PetscFunctionBegin; 1500 1501 /* compute prefixes */ 1502 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1503 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1504 if (!pcbddc->current_level) { 1505 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1506 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1507 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1508 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1509 } else { 1510 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1511 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1512 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1513 len -= 15; /* remove "pc_bddc_coarse_" */ 1514 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1515 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1516 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1517 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1522 } 1523 1524 /* DIRICHLET PROBLEM */ 1525 if (dirichlet) { 1526 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1527 if (pcbddc->local_mat->symmetric_set) { 1528 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1529 } 1530 /* Matrix for Dirichlet problem is pcis->A_II */ 1531 n_D = pcis->n - pcis->n_B; 1532 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1533 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1534 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1535 /* default */ 1536 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1537 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1538 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1539 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1540 if (issbaij) { 1541 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1542 } else { 1543 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1544 } 1545 /* Allow user's customization */ 1546 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1548 } 1549 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1550 if (sub_schurs->reuse_mumps) { 1551 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1552 1553 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1554 } 1555 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1556 if (!n_D) { 1557 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1558 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1559 } 1560 /* Set Up KSP for Dirichlet problem of BDDC */ 1561 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1562 /* set ksp_D into pcis data */ 1563 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1564 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1565 pcis->ksp_D = pcbddc->ksp_D; 1566 } 1567 1568 /* NEUMANN PROBLEM */ 1569 A_RR = 0; 1570 if (neumann) { 1571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1572 PetscInt ibs,mbs; 1573 PetscBool issbaij; 1574 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1575 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1576 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1577 if (pcbddc->ksp_R) { /* already created ksp */ 1578 PetscInt nn_R; 1579 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1580 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1581 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1582 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1583 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1584 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1585 reuse = MAT_INITIAL_MATRIX; 1586 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1587 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* safe to reuse the matrix */ 1591 reuse = MAT_REUSE_MATRIX; 1592 } 1593 } 1594 /* last check */ 1595 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1596 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1597 reuse = MAT_INITIAL_MATRIX; 1598 } 1599 } else { /* first time, so we need to create the matrix */ 1600 reuse = MAT_INITIAL_MATRIX; 1601 } 1602 /* extract A_RR */ 1603 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1604 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1605 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1606 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1607 if (matis->A == pcbddc->local_mat) { 1608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1609 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1610 } else { 1611 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1612 } 1613 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1614 if (matis->A == pcbddc->local_mat) { 1615 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1616 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1617 } else { 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1619 } 1620 } 1621 if (!sub_schurs->reuse_mumps) { 1622 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1623 if (pcbddc->local_mat->symmetric_set) { 1624 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1625 } 1626 } else { 1627 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1628 1629 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1630 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1632 } 1633 if (!pcbddc->ksp_R) { /* create object if not present */ 1634 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1635 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1636 /* default */ 1637 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1638 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1639 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1640 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1641 if (issbaij) { 1642 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1643 } else { 1644 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1645 } 1646 /* Allow user's customization */ 1647 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1648 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1649 } 1650 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1651 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1652 if (!n_R) { 1653 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1654 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1655 } 1656 /* Reuse MUMPS solver if it is present */ 1657 if (sub_schurs->reuse_mumps) { 1658 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1659 1660 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1661 } 1662 /* Set Up KSP for Neumann problem of BDDC */ 1663 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1664 } 1665 /* free Neumann problem's matrix */ 1666 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1667 1668 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1669 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1670 if (pcbddc->dbg_flag) { 1671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1672 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1674 } 1675 if (dirichlet) { /* Dirichlet */ 1676 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1677 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1678 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1679 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1680 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1681 /* need to be adapted? */ 1682 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1683 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1684 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1685 /* print info */ 1686 if (pcbddc->dbg_flag) { 1687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1689 } 1690 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1691 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1692 } 1693 } 1694 if (neumann) { /* Neumann */ 1695 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1696 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1697 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1698 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1700 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1701 /* need to be adapted? */ 1702 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1703 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1704 /* print info */ 1705 if (pcbddc->dbg_flag) { 1706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1708 } 1709 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1710 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1711 } 1712 } 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1719 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1720 { 1721 PetscErrorCode ierr; 1722 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1723 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1724 1725 PetscFunctionBegin; 1726 if (!sub_schurs->reuse_mumps) { 1727 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1728 } 1729 if (!pcbddc->switch_static) { 1730 if (applytranspose && pcbddc->local_auxmat1) { 1731 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1732 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1733 } 1734 if (!sub_schurs->reuse_mumps) { 1735 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 } else { 1738 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1739 1740 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1741 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 } 1743 } else { 1744 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 if (applytranspose && pcbddc->local_auxmat1) { 1749 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1750 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1751 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 } 1754 } 1755 if (!sub_schurs->reuse_mumps) { 1756 if (applytranspose) { 1757 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1758 } else { 1759 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1760 } 1761 #if defined(PETSC_HAVE_MUMPS) 1762 } else { 1763 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1764 1765 if (applytranspose) { 1766 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1767 } else { 1768 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1769 } 1770 #endif 1771 } 1772 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1773 if (!pcbddc->switch_static) { 1774 if (!sub_schurs->reuse_mumps) { 1775 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 } else { 1778 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1779 1780 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1781 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1782 } 1783 if (!applytranspose && pcbddc->local_auxmat1) { 1784 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1785 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1786 } 1787 } else { 1788 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1789 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 if (!applytranspose && pcbddc->local_auxmat1) { 1793 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1794 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1795 } 1796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1807 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1808 { 1809 PetscErrorCode ierr; 1810 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1811 PC_IS* pcis = (PC_IS*) (pc->data); 1812 const PetscScalar zero = 0.0; 1813 1814 PetscFunctionBegin; 1815 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1816 if (applytranspose) { 1817 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1818 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1819 } else { 1820 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1821 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1822 } 1823 /* start communications from local primal nodes to rhs of coarse solver */ 1824 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1825 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1826 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 1828 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1829 /* TODO remove null space when doing multilevel */ 1830 if (pcbddc->coarse_ksp) { 1831 Vec rhs,sol; 1832 1833 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1834 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1835 if (applytranspose) { 1836 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1837 } else { 1838 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1839 } 1840 } 1841 1842 /* Local solution on R nodes */ 1843 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1844 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1845 } 1846 1847 /* communications from coarse sol to local primal nodes */ 1848 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1849 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 1851 /* Sum contributions from two levels */ 1852 if (applytranspose) { 1853 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1854 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1855 } else { 1856 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1857 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1864 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1865 { 1866 PetscErrorCode ierr; 1867 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1868 PetscScalar *array; 1869 Vec from,to; 1870 1871 PetscFunctionBegin; 1872 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1873 from = pcbddc->coarse_vec; 1874 to = pcbddc->vec1_P; 1875 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1876 Vec tvec; 1877 1878 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1879 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1880 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1881 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1882 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1883 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1884 } 1885 } else { /* from local to global -> put data in coarse right hand side */ 1886 from = pcbddc->vec1_P; 1887 to = pcbddc->coarse_vec; 1888 } 1889 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1895 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1896 { 1897 PetscErrorCode ierr; 1898 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1899 PetscScalar *array; 1900 Vec from,to; 1901 1902 PetscFunctionBegin; 1903 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1904 from = pcbddc->coarse_vec; 1905 to = pcbddc->vec1_P; 1906 } else { /* from local to global -> put data in coarse right hand side */ 1907 from = pcbddc->vec1_P; 1908 to = pcbddc->coarse_vec; 1909 } 1910 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1911 if (smode == SCATTER_FORWARD) { 1912 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1913 Vec tvec; 1914 1915 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1916 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1917 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1918 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1919 } 1920 } else { 1921 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1922 ierr = VecResetArray(from);CHKERRQ(ierr); 1923 } 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 /* uncomment for testing purposes */ 1929 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1932 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1933 { 1934 PetscErrorCode ierr; 1935 PC_IS* pcis = (PC_IS*)(pc->data); 1936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1937 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1938 /* one and zero */ 1939 PetscScalar one=1.0,zero=0.0; 1940 /* space to store constraints and their local indices */ 1941 PetscScalar *constraints_data; 1942 PetscInt *constraints_idxs,*constraints_idxs_B; 1943 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1944 PetscInt *constraints_n; 1945 /* iterators */ 1946 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1947 /* BLAS integers */ 1948 PetscBLASInt lwork,lierr; 1949 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1950 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1951 /* reuse */ 1952 PetscInt olocal_primal_size,olocal_primal_size_cc; 1953 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1954 /* change of basis */ 1955 PetscBool qr_needed; 1956 PetscBT change_basis,qr_needed_idx; 1957 /* auxiliary stuff */ 1958 PetscInt *nnz,*is_indices; 1959 PetscInt ncc; 1960 /* some quantities */ 1961 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1962 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1963 1964 PetscFunctionBegin; 1965 /* Destroy Mat objects computed previously */ 1966 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1967 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1968 /* save info on constraints from previous setup (if any) */ 1969 olocal_primal_size = pcbddc->local_primal_size; 1970 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1971 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1972 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1973 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1974 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1975 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1976 1977 /* print some info */ 1978 if (pcbddc->dbg_flag) { 1979 IS vertices; 1980 PetscInt nv,nedges,nfaces; 1981 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1982 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1983 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1984 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1990 } 1991 1992 if (!pcbddc->adaptive_selection) { 1993 IS ISForVertices,*ISForFaces,*ISForEdges; 1994 MatNullSpace nearnullsp; 1995 const Vec *nearnullvecs; 1996 Vec *localnearnullsp; 1997 PetscScalar *array; 1998 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1999 PetscBool nnsp_has_cnst; 2000 /* LAPACK working arrays for SVD or POD */ 2001 PetscBool skip_lapack,boolforchange; 2002 PetscScalar *work; 2003 PetscReal *singular_vals; 2004 #if defined(PETSC_USE_COMPLEX) 2005 PetscReal *rwork; 2006 #endif 2007 #if defined(PETSC_MISSING_LAPACK_GESVD) 2008 PetscScalar *temp_basis,*correlation_mat; 2009 #else 2010 PetscBLASInt dummy_int=1; 2011 PetscScalar dummy_scalar=1.; 2012 #endif 2013 2014 /* Get index sets for faces, edges and vertices from graph */ 2015 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2016 /* free unneeded index sets */ 2017 if (!pcbddc->use_vertices) { 2018 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2019 } 2020 if (!pcbddc->use_edges) { 2021 for (i=0;i<n_ISForEdges;i++) { 2022 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2023 } 2024 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2025 n_ISForEdges = 0; 2026 } 2027 if (!pcbddc->use_faces) { 2028 for (i=0;i<n_ISForFaces;i++) { 2029 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2032 n_ISForFaces = 0; 2033 } 2034 2035 #if defined(PETSC_USE_DEBUG) 2036 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2037 Also use_change_of_basis should be consistent among processors */ 2038 if (pcbddc->NullSpace) { 2039 PetscBool tbool[2],gbool[2]; 2040 2041 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2042 pcbddc->use_change_of_basis = PETSC_TRUE; 2043 if (!ISForEdges) { 2044 pcbddc->use_change_on_faces = PETSC_TRUE; 2045 } 2046 } 2047 tbool[0] = pcbddc->use_change_of_basis; 2048 tbool[1] = pcbddc->use_change_on_faces; 2049 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2050 pcbddc->use_change_of_basis = gbool[0]; 2051 pcbddc->use_change_on_faces = gbool[1]; 2052 } 2053 #endif 2054 2055 /* check if near null space is attached to global mat */ 2056 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2057 if (nearnullsp) { 2058 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2059 /* remove any stored info */ 2060 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2061 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2062 /* store information for BDDC solver reuse */ 2063 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2064 pcbddc->onearnullspace = nearnullsp; 2065 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2066 for (i=0;i<nnsp_size;i++) { 2067 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2068 } 2069 } else { /* if near null space is not provided BDDC uses constants by default */ 2070 nnsp_size = 0; 2071 nnsp_has_cnst = PETSC_TRUE; 2072 } 2073 /* get max number of constraints on a single cc */ 2074 max_constraints = nnsp_size; 2075 if (nnsp_has_cnst) max_constraints++; 2076 2077 /* 2078 Evaluate maximum storage size needed by the procedure 2079 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2080 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2081 There can be multiple constraints per connected component 2082 */ 2083 n_vertices = 0; 2084 if (ISForVertices) { 2085 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2086 } 2087 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2088 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2089 2090 total_counts = n_ISForFaces+n_ISForEdges; 2091 total_counts *= max_constraints; 2092 total_counts += n_vertices; 2093 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2094 2095 total_counts = 0; 2096 max_size_of_constraint = 0; 2097 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2098 IS used_is; 2099 if (i<n_ISForEdges) { 2100 used_is = ISForEdges[i]; 2101 } else { 2102 used_is = ISForFaces[i-n_ISForEdges]; 2103 } 2104 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2105 total_counts += j; 2106 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2107 } 2108 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2109 2110 /* get local part of global near null space vectors */ 2111 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2112 for (k=0;k<nnsp_size;k++) { 2113 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2114 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 } 2117 2118 /* whether or not to skip lapack calls */ 2119 skip_lapack = PETSC_TRUE; 2120 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2121 2122 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2123 if (!skip_lapack) { 2124 PetscScalar temp_work; 2125 2126 #if defined(PETSC_MISSING_LAPACK_GESVD) 2127 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2128 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2129 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2130 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2131 #if defined(PETSC_USE_COMPLEX) 2132 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2133 #endif 2134 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2135 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2137 lwork = -1; 2138 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2139 #if !defined(PETSC_USE_COMPLEX) 2140 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2141 #else 2142 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2143 #endif 2144 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2145 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2146 #else /* on missing GESVD */ 2147 /* SVD */ 2148 PetscInt max_n,min_n; 2149 max_n = max_size_of_constraint; 2150 min_n = max_constraints; 2151 if (max_size_of_constraint < max_constraints) { 2152 min_n = max_size_of_constraint; 2153 max_n = max_constraints; 2154 } 2155 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2156 #if defined(PETSC_USE_COMPLEX) 2157 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2158 #endif 2159 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2160 lwork = -1; 2161 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2162 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2164 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2165 #if !defined(PETSC_USE_COMPLEX) 2166 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2167 #else 2168 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2169 #endif 2170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2171 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2172 #endif /* on missing GESVD */ 2173 /* Allocate optimal workspace */ 2174 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2175 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2176 } 2177 /* Now we can loop on constraining sets */ 2178 total_counts = 0; 2179 constraints_idxs_ptr[0] = 0; 2180 constraints_data_ptr[0] = 0; 2181 /* vertices */ 2182 if (n_vertices) { 2183 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2184 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2185 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2186 for (i=0;i<n_vertices;i++) { 2187 constraints_n[total_counts] = 1; 2188 constraints_data[total_counts] = 1.0; 2189 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2190 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2191 total_counts++; 2192 } 2193 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2194 PetscBool used_vertex; 2195 for (i=0;i<n_vertices;i++) { 2196 used_vertex = PETSC_FALSE; 2197 k = 0; 2198 while (!used_vertex && k<nnsp_size) { 2199 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2200 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2201 constraints_n[total_counts] = 1; 2202 constraints_idxs[total_counts] = is_indices[i]; 2203 constraints_data[total_counts] = 1.0; 2204 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2205 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2206 total_counts++; 2207 used_vertex = PETSC_TRUE; 2208 } 2209 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2210 k++; 2211 } 2212 } 2213 } 2214 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2215 n_vertices = total_counts; 2216 } 2217 2218 /* edges and faces */ 2219 total_counts_cc = total_counts; 2220 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2221 IS used_is; 2222 PetscBool idxs_copied = PETSC_FALSE; 2223 2224 if (ncc<n_ISForEdges) { 2225 used_is = ISForEdges[ncc]; 2226 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2227 } else { 2228 used_is = ISForFaces[ncc-n_ISForEdges]; 2229 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2230 } 2231 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2232 2233 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2234 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2235 /* change of basis should not be performed on local periodic nodes */ 2236 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2237 if (nnsp_has_cnst) { 2238 PetscScalar quad_value; 2239 2240 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2241 idxs_copied = PETSC_TRUE; 2242 2243 if (!pcbddc->use_nnsp_true) { 2244 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2245 } else { 2246 quad_value = 1.0; 2247 } 2248 for (j=0;j<size_of_constraint;j++) { 2249 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2250 } 2251 temp_constraints++; 2252 total_counts++; 2253 } 2254 for (k=0;k<nnsp_size;k++) { 2255 PetscReal real_value; 2256 PetscScalar *ptr_to_data; 2257 2258 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2259 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2260 for (j=0;j<size_of_constraint;j++) { 2261 ptr_to_data[j] = array[is_indices[j]]; 2262 } 2263 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2264 /* check if array is null on the connected component */ 2265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2266 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2267 if (real_value > 0.0) { /* keep indices and values */ 2268 temp_constraints++; 2269 total_counts++; 2270 if (!idxs_copied) { 2271 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2272 idxs_copied = PETSC_TRUE; 2273 } 2274 } 2275 } 2276 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2277 valid_constraints = temp_constraints; 2278 if (!pcbddc->use_nnsp_true && temp_constraints) { 2279 if (temp_constraints == 1) { /* just normalize the constraint */ 2280 PetscScalar norm,*ptr_to_data; 2281 2282 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2284 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2285 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2286 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2287 } else { /* perform SVD */ 2288 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2289 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2290 2291 #if defined(PETSC_MISSING_LAPACK_GESVD) 2292 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2293 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2294 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2295 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2296 from that computed using LAPACKgesvd 2297 -> This is due to a different computation of eigenvectors in LAPACKheev 2298 -> The quality of the POD-computed basis will be the same */ 2299 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2300 /* Store upper triangular part of correlation matrix */ 2301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2303 for (j=0;j<temp_constraints;j++) { 2304 for (k=0;k<j+1;k++) { 2305 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2306 } 2307 } 2308 /* compute eigenvalues and eigenvectors of correlation matrix */ 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2311 #if !defined(PETSC_USE_COMPLEX) 2312 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2313 #else 2314 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2315 #endif 2316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2317 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2318 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2319 j = 0; 2320 while (j < temp_constraints && singular_vals[j] < tol) j++; 2321 total_counts = total_counts-j; 2322 valid_constraints = temp_constraints-j; 2323 /* scale and copy POD basis into used quadrature memory */ 2324 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2325 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2330 if (j<temp_constraints) { 2331 PetscInt ii; 2332 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2333 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2334 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2335 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2336 for (k=0;k<temp_constraints-j;k++) { 2337 for (ii=0;ii<size_of_constraint;ii++) { 2338 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2339 } 2340 } 2341 } 2342 #else /* on missing GESVD */ 2343 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2344 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2347 #if !defined(PETSC_USE_COMPLEX) 2348 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2349 #else 2350 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2351 #endif 2352 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2354 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2355 k = temp_constraints; 2356 if (k > size_of_constraint) k = size_of_constraint; 2357 j = 0; 2358 while (j < k && singular_vals[k-j-1] < tol) j++; 2359 valid_constraints = k-j; 2360 total_counts = total_counts-temp_constraints+valid_constraints; 2361 #endif /* on missing GESVD */ 2362 } 2363 } 2364 /* update pointers information */ 2365 if (valid_constraints) { 2366 constraints_n[total_counts_cc] = valid_constraints; 2367 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2368 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2369 /* set change_of_basis flag */ 2370 if (boolforchange) { 2371 PetscBTSet(change_basis,total_counts_cc); 2372 } 2373 total_counts_cc++; 2374 } 2375 } 2376 /* free workspace */ 2377 if (!skip_lapack) { 2378 ierr = PetscFree(work);CHKERRQ(ierr); 2379 #if defined(PETSC_USE_COMPLEX) 2380 ierr = PetscFree(rwork);CHKERRQ(ierr); 2381 #endif 2382 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2383 #if defined(PETSC_MISSING_LAPACK_GESVD) 2384 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2385 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2386 #endif 2387 } 2388 for (k=0;k<nnsp_size;k++) { 2389 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2390 } 2391 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2392 /* free index sets of faces, edges and vertices */ 2393 for (i=0;i<n_ISForFaces;i++) { 2394 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2395 } 2396 if (n_ISForFaces) { 2397 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2398 } 2399 for (i=0;i<n_ISForEdges;i++) { 2400 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2401 } 2402 if (n_ISForEdges) { 2403 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2404 } 2405 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2406 } else { 2407 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2408 2409 total_counts = 0; 2410 n_vertices = 0; 2411 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2412 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2413 } 2414 max_constraints = 0; 2415 total_counts_cc = 0; 2416 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2417 total_counts += pcbddc->adaptive_constraints_n[i]; 2418 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2419 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2420 } 2421 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2422 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2423 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2424 constraints_data = pcbddc->adaptive_constraints_data; 2425 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2426 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2427 total_counts_cc = 0; 2428 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2429 if (pcbddc->adaptive_constraints_n[i]) { 2430 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2431 } 2432 } 2433 #if 0 2434 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2435 for (i=0;i<total_counts_cc;i++) { 2436 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2437 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2438 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2439 printf(" %d",constraints_idxs[j]); 2440 } 2441 printf("\n"); 2442 printf("number of cc: %d\n",constraints_n[i]); 2443 } 2444 for (i=0;i<n_vertices;i++) { 2445 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2446 } 2447 for (i=0;i<sub_schurs->n_subs;i++) { 2448 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2449 } 2450 #endif 2451 2452 max_size_of_constraint = 0; 2453 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2454 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2455 /* Change of basis */ 2456 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2457 if (pcbddc->use_change_of_basis) { 2458 for (i=0;i<sub_schurs->n_subs;i++) { 2459 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2460 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2461 } 2462 } 2463 } 2464 } 2465 pcbddc->local_primal_size = total_counts; 2466 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2467 2468 /* map constraints_idxs in boundary numbering */ 2469 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2470 if (i != constraints_idxs_ptr[total_counts_cc]) { 2471 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2472 } 2473 2474 /* Create constraint matrix */ 2475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2476 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2477 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2478 2479 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2480 /* determine if a QR strategy is needed for change of basis */ 2481 qr_needed = PETSC_FALSE; 2482 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2483 total_primal_vertices=0; 2484 pcbddc->local_primal_size_cc = 0; 2485 for (i=0;i<total_counts_cc;i++) { 2486 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2487 if (size_of_constraint == 1) { 2488 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2489 pcbddc->local_primal_size_cc += 1; 2490 } else if (PetscBTLookup(change_basis,i)) { 2491 for (k=0;k<constraints_n[i];k++) { 2492 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2493 } 2494 pcbddc->local_primal_size_cc += constraints_n[i]; 2495 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2496 PetscBTSet(qr_needed_idx,i); 2497 qr_needed = PETSC_TRUE; 2498 } 2499 } else { 2500 pcbddc->local_primal_size_cc += 1; 2501 } 2502 } 2503 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2504 pcbddc->n_vertices = total_primal_vertices; 2505 /* permute indices in order to have a sorted set of vertices */ 2506 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2507 2508 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2509 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2510 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2511 2512 /* nonzero structure of constraint matrix */ 2513 /* and get reference dof for local constraints */ 2514 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2515 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2516 2517 j = total_primal_vertices; 2518 total_counts = total_primal_vertices; 2519 cum = total_primal_vertices; 2520 for (i=n_vertices;i<total_counts_cc;i++) { 2521 if (!PetscBTLookup(change_basis,i)) { 2522 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2523 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2524 cum++; 2525 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2526 for (k=0;k<constraints_n[i];k++) { 2527 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2528 nnz[j+k] = size_of_constraint; 2529 } 2530 j += constraints_n[i]; 2531 } 2532 } 2533 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2534 ierr = PetscFree(nnz);CHKERRQ(ierr); 2535 2536 /* set values in constraint matrix */ 2537 for (i=0;i<total_primal_vertices;i++) { 2538 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2539 } 2540 total_counts = total_primal_vertices; 2541 for (i=n_vertices;i<total_counts_cc;i++) { 2542 if (!PetscBTLookup(change_basis,i)) { 2543 PetscInt *cols; 2544 2545 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2546 cols = constraints_idxs+constraints_idxs_ptr[i]; 2547 for (k=0;k<constraints_n[i];k++) { 2548 PetscInt row = total_counts+k; 2549 PetscScalar *vals; 2550 2551 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2552 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2553 } 2554 total_counts += constraints_n[i]; 2555 } 2556 } 2557 /* assembling */ 2558 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 2561 /* 2562 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2563 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2564 */ 2565 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2566 if (pcbddc->use_change_of_basis) { 2567 /* dual and primal dofs on a single cc */ 2568 PetscInt dual_dofs,primal_dofs; 2569 /* working stuff for GEQRF */ 2570 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2571 PetscBLASInt lqr_work; 2572 /* working stuff for UNGQR */ 2573 PetscScalar *gqr_work,lgqr_work_t; 2574 PetscBLASInt lgqr_work; 2575 /* working stuff for TRTRS */ 2576 PetscScalar *trs_rhs; 2577 PetscBLASInt Blas_NRHS; 2578 /* pointers for values insertion into change of basis matrix */ 2579 PetscInt *start_rows,*start_cols; 2580 PetscScalar *start_vals; 2581 /* working stuff for values insertion */ 2582 PetscBT is_primal; 2583 PetscInt *aux_primal_numbering_B; 2584 /* matrix sizes */ 2585 PetscInt global_size,local_size; 2586 /* temporary change of basis */ 2587 Mat localChangeOfBasisMatrix; 2588 /* extra space for debugging */ 2589 PetscScalar *dbg_work; 2590 2591 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2592 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2593 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2594 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2595 /* nonzeros for local mat */ 2596 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2597 for (i=0;i<pcis->n;i++) nnz[i]=1; 2598 for (i=n_vertices;i<total_counts_cc;i++) { 2599 if (PetscBTLookup(change_basis,i)) { 2600 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2601 if (PetscBTLookup(qr_needed_idx,i)) { 2602 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2603 } else { 2604 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2605 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2606 } 2607 } 2608 } 2609 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2610 ierr = PetscFree(nnz);CHKERRQ(ierr); 2611 /* Set initial identity in the matrix */ 2612 for (i=0;i<pcis->n;i++) { 2613 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2614 } 2615 2616 if (pcbddc->dbg_flag) { 2617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2619 } 2620 2621 2622 /* Now we loop on the constraints which need a change of basis */ 2623 /* 2624 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2625 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2626 2627 Basic blocks of change of basis matrix T computed by 2628 2629 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2630 2631 | 1 0 ... 0 s_1/S | 2632 | 0 1 ... 0 s_2/S | 2633 | ... | 2634 | 0 ... 1 s_{n-1}/S | 2635 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2636 2637 with S = \sum_{i=1}^n s_i^2 2638 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2639 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2640 2641 - QR decomposition of constraints otherwise 2642 */ 2643 if (qr_needed) { 2644 /* space to store Q */ 2645 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2646 /* first we issue queries for optimal work */ 2647 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2648 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2650 lqr_work = -1; 2651 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2652 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2653 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2654 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2655 lgqr_work = -1; 2656 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2657 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2658 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2659 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2660 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2661 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2662 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2663 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2664 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2665 /* array to store scaling factors for reflectors */ 2666 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2667 /* array to store rhs and solution of triangular solver */ 2668 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2669 /* allocating workspace for check */ 2670 if (pcbddc->dbg_flag) { 2671 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2672 } 2673 } 2674 /* array to store whether a node is primal or not */ 2675 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2676 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2677 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2678 if (i != total_primal_vertices) { 2679 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2680 } 2681 for (i=0;i<total_primal_vertices;i++) { 2682 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2683 } 2684 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2685 2686 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2687 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2688 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2689 if (PetscBTLookup(change_basis,total_counts)) { 2690 /* get constraint info */ 2691 primal_dofs = constraints_n[total_counts]; 2692 dual_dofs = size_of_constraint-primal_dofs; 2693 2694 if (pcbddc->dbg_flag) { 2695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2696 } 2697 2698 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2699 2700 /* copy quadrature constraints for change of basis check */ 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2703 } 2704 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2705 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2706 2707 /* compute QR decomposition of constraints */ 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2712 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2713 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2715 2716 /* explictly compute R^-T */ 2717 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2718 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2719 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2723 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2724 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2725 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2726 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2727 2728 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2735 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2736 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2737 2738 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2739 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2740 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2741 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2742 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2743 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2747 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2748 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2749 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2750 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2751 2752 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2753 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2754 /* insert cols for primal dofs */ 2755 for (j=0;j<primal_dofs;j++) { 2756 start_vals = &qr_basis[j*size_of_constraint]; 2757 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2758 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2759 } 2760 /* insert cols for dual dofs */ 2761 for (j=0,k=0;j<dual_dofs;k++) { 2762 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2763 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2764 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2765 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2766 j++; 2767 } 2768 } 2769 2770 /* check change of basis */ 2771 if (pcbddc->dbg_flag) { 2772 PetscInt ii,jj; 2773 PetscBool valid_qr=PETSC_TRUE; 2774 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2775 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2776 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2780 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2781 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2782 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2783 for (jj=0;jj<size_of_constraint;jj++) { 2784 for (ii=0;ii<primal_dofs;ii++) { 2785 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2786 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2787 } 2788 } 2789 if (!valid_qr) { 2790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2791 for (jj=0;jj<size_of_constraint;jj++) { 2792 for (ii=0;ii<primal_dofs;ii++) { 2793 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2794 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2795 } 2796 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2797 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2798 } 2799 } 2800 } 2801 } else { 2802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2803 } 2804 } 2805 } else { /* simple transformation block */ 2806 PetscInt row,col; 2807 PetscScalar val,norm; 2808 2809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2810 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2811 for (j=0;j<size_of_constraint;j++) { 2812 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2813 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2814 if (!PetscBTLookup(is_primal,row_B)) { 2815 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2816 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2817 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2818 } else { 2819 for (k=0;k<size_of_constraint;k++) { 2820 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2821 if (row != col) { 2822 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2823 } else { 2824 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2825 } 2826 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2827 } 2828 } 2829 } 2830 if (pcbddc->dbg_flag) { 2831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2832 } 2833 } 2834 } else { 2835 if (pcbddc->dbg_flag) { 2836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2837 } 2838 } 2839 } 2840 2841 /* free workspace */ 2842 if (qr_needed) { 2843 if (pcbddc->dbg_flag) { 2844 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2845 } 2846 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2847 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2848 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2849 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2850 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2851 } 2852 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2853 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 2856 /* assembling of global change of variable */ 2857 { 2858 Mat tmat; 2859 PetscInt bs; 2860 2861 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2862 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2863 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2864 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2865 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2867 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2868 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2869 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2870 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2871 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2872 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2873 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2874 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2875 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2876 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2878 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2879 } 2880 /* check */ 2881 if (pcbddc->dbg_flag) { 2882 PetscReal error; 2883 Vec x,x_change; 2884 2885 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2886 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2887 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2888 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2889 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2895 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2896 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2898 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2899 ierr = VecDestroy(&x);CHKERRQ(ierr); 2900 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2901 } 2902 2903 /* adapt sub_schurs computed (if any) */ 2904 if (pcbddc->use_deluxe_scaling) { 2905 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2906 if (sub_schurs->S_Ej_all) { 2907 Mat S_new,tmat; 2908 IS is_all_N; 2909 2910 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2911 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2912 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2913 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2914 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2915 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2916 sub_schurs->S_Ej_all = S_new; 2917 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2918 if (sub_schurs->sum_S_Ej_all) { 2919 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2920 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2921 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2922 sub_schurs->sum_S_Ej_all = S_new; 2923 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2924 } 2925 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2926 } 2927 } 2928 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2929 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2930 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2931 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2932 } 2933 2934 /* set up change of basis context */ 2935 if (pcbddc->ChangeOfBasisMatrix) { 2936 PCBDDCChange_ctx change_ctx; 2937 2938 if (!pcbddc->new_global_mat) { 2939 PetscInt global_size,local_size; 2940 2941 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2942 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2944 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2945 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2946 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2947 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2948 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2949 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2950 } else { 2951 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2952 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2953 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2954 } 2955 if (!pcbddc->user_ChangeOfBasisMatrix) { 2956 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2957 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2958 } else { 2959 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2960 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2961 } 2962 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2963 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2964 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 } 2967 2968 /* check if a new primal space has been introduced */ 2969 pcbddc->new_primal_space_local = PETSC_TRUE; 2970 if (olocal_primal_size == pcbddc->local_primal_size) { 2971 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2972 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2973 if (!pcbddc->new_primal_space_local) { 2974 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2975 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2976 } 2977 } 2978 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2979 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2980 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2981 2982 /* flush dbg viewer */ 2983 if (pcbddc->dbg_flag) { 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 } 2986 2987 /* free workspace */ 2988 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2989 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2990 if (!pcbddc->adaptive_selection) { 2991 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2992 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2993 } else { 2994 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2995 pcbddc->adaptive_constraints_idxs_ptr, 2996 pcbddc->adaptive_constraints_data_ptr, 2997 pcbddc->adaptive_constraints_idxs, 2998 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2999 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3000 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3007 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3008 { 3009 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3010 PC_IS *pcis = (PC_IS*)pc->data; 3011 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3012 PetscInt ierr,i,vertex_size,N; 3013 PetscViewer viewer=pcbddc->dbg_viewer; 3014 3015 PetscFunctionBegin; 3016 /* Reset previously computed graph */ 3017 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3018 /* Init local Graph struct */ 3019 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3020 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3021 3022 /* Check validity of the csr graph passed in by the user */ 3023 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3024 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3025 } 3026 3027 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3028 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3029 PetscInt *xadj,*adjncy; 3030 PetscInt nvtxs; 3031 PetscBool flg_row=PETSC_FALSE; 3032 3033 if (pcbddc->use_local_adj) { 3034 3035 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3036 if (flg_row) { 3037 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3038 pcbddc->computed_rowadj = PETSC_TRUE; 3039 } 3040 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3041 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3042 IS is_dummy; 3043 ISLocalToGlobalMapping l2gmap_dummy; 3044 PetscInt j,sum; 3045 PetscInt *cxadj,*cadjncy; 3046 const PetscInt *idxs; 3047 PCBDDCGraph graph; 3048 PetscBT is_on_boundary; 3049 3050 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3051 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3052 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3053 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3054 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3055 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3056 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3057 if (flg_row) { 3058 graph->xadj = xadj; 3059 graph->adjncy = adjncy; 3060 } 3061 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3062 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3063 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3064 3065 if (pcbddc->dbg_flag) { 3066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3067 for (i=0;i<graph->ncc;i++) { 3068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3069 } 3070 } 3071 3072 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3073 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3074 for (i=0;i<pcis->n_B;i++) { 3075 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3076 } 3077 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3078 3079 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3080 sum = 0; 3081 for (i=0;i<graph->ncc;i++) { 3082 PetscInt sizecc = 0; 3083 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3084 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3085 sizecc++; 3086 } 3087 } 3088 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3089 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3090 cxadj[graph->queue[j]] = sizecc; 3091 } 3092 } 3093 sum += sizecc*sizecc; 3094 } 3095 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3096 sum = 0; 3097 for (i=0;i<pcis->n;i++) { 3098 PetscInt temp = cxadj[i]; 3099 cxadj[i] = sum; 3100 sum += temp; 3101 } 3102 cxadj[pcis->n] = sum; 3103 for (i=0;i<graph->ncc;i++) { 3104 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3105 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3106 PetscInt k,sizecc = 0; 3107 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3108 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3109 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3110 sizecc++; 3111 } 3112 } 3113 } 3114 } 3115 } 3116 if (sum) { 3117 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3118 } else { 3119 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3120 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3121 } 3122 graph->xadj = 0; 3123 graph->adjncy = 0; 3124 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3125 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3126 } 3127 } 3128 if (pcbddc->dbg_flag) { 3129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3130 } 3131 3132 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3133 vertex_size = 1; 3134 if (pcbddc->user_provided_isfordofs) { 3135 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3136 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3137 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3138 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3139 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3140 } 3141 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3142 pcbddc->n_ISForDofs = 0; 3143 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3144 } 3145 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3146 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3147 } else { 3148 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3149 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3150 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3151 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3152 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3153 } 3154 } 3155 } 3156 3157 /* Setup of Graph */ 3158 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3159 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3160 } 3161 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3162 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3163 } 3164 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3165 3166 /* Graph's connected components analysis */ 3167 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3168 3169 /* print some info to stdout */ 3170 if (pcbddc->dbg_flag) { 3171 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3172 } 3173 3174 /* mark topography has done */ 3175 pcbddc->recompute_topography = PETSC_FALSE; 3176 PetscFunctionReturn(0); 3177 } 3178 3179 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCSubsetNumbering" 3182 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3183 { 3184 PetscSF sf; 3185 PetscLayout map; 3186 const PetscInt *idxs; 3187 PetscInt *leaf_data,*root_data,*gidxs; 3188 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3189 PetscInt n_n,nlocals,start,first_index; 3190 PetscMPIInt commsize; 3191 PetscBool first_found; 3192 PetscErrorCode ierr; 3193 3194 PetscFunctionBegin; 3195 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3196 if (subset_mult) { 3197 PetscCheckSameComm(subset,1,subset_mult,2); 3198 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3199 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3200 } 3201 /* create workspace layout for computing global indices of subset */ 3202 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3203 lbounds[0] = lbounds[1] = 0; 3204 for (i=0;i<n;i++) { 3205 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3206 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3207 } 3208 lbounds[0] = -lbounds[0]; 3209 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3210 gbounds[0] = -gbounds[0]; 3211 N = gbounds[1] - gbounds[0] + 1; 3212 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3213 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3215 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3216 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3217 3218 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3219 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3220 if (subset_mult) { 3221 const PetscInt* idxs_mult; 3222 3223 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3224 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3225 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3226 } else { 3227 for (i=0;i<n;i++) leaf_data[i] = 1; 3228 } 3229 /* local size of new subset */ 3230 n_n = 0; 3231 for (i=0;i<n;i++) n_n += leaf_data[i]; 3232 3233 /* global indexes in layout */ 3234 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3235 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3236 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3237 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3238 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3239 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3240 3241 /* reduce from leaves to roots */ 3242 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3243 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3244 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3245 3246 /* count indexes in local part of layout */ 3247 nlocals = 0; 3248 first_index = -1; 3249 first_found = PETSC_FALSE; 3250 for (i=0;i<Nl;i++) { 3251 if (!first_found && root_data[i]) { 3252 first_found = PETSC_TRUE; 3253 first_index = i; 3254 } 3255 nlocals += root_data[i]; 3256 } 3257 3258 /* cumulative of number of indexes and size of subset without holes */ 3259 start = 0; 3260 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3261 if (N_n) { /* compute total size of new subset if requested */ 3262 *N_n = start + nlocals; 3263 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3264 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3265 } 3266 /* adapt roof data with cumulative */ 3267 if (first_found) { 3268 PetscInt old_index; 3269 3270 root_data[first_index] += start; 3271 old_index = first_index; 3272 for (i=first_index+1;i<Nl;i++) { 3273 if (root_data[i]) { 3274 root_data[i] += root_data[old_index]; 3275 old_index = i; 3276 } 3277 } 3278 } 3279 3280 /* from roots to leaves */ 3281 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3282 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3283 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3284 3285 /* create new IS with global indexes without holes */ 3286 if (subset_mult) { 3287 const PetscInt* idxs_mult; 3288 PetscInt cum; 3289 3290 cum = 0; 3291 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3292 for (i=0;i<n;i++) { 3293 PetscInt j; 3294 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3295 } 3296 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3297 } else { 3298 for (i=0;i<n;i++) { 3299 gidxs[i] = leaf_data[i]-1; 3300 } 3301 } 3302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3303 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3309 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3310 { 3311 PetscInt i,j; 3312 PetscScalar *alphas; 3313 PetscErrorCode ierr; 3314 3315 PetscFunctionBegin; 3316 /* this implements stabilized Gram-Schmidt */ 3317 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3318 for (i=0;i<n;i++) { 3319 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3320 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3321 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3322 } 3323 ierr = PetscFree(alphas);CHKERRQ(ierr); 3324 PetscFunctionReturn(0); 3325 } 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatISGetSubassemblingPattern" 3329 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3330 { 3331 Mat subdomain_adj; 3332 IS new_ranks,ranks_send_to; 3333 MatPartitioning partitioner; 3334 Mat_IS *matis; 3335 PetscInt n_neighs,*neighs,*n_shared,**shared; 3336 PetscInt prank; 3337 PetscMPIInt size,rank,color; 3338 PetscInt *xadj,*adjncy,*oldranks; 3339 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3340 PetscInt i,local_size,threshold=0; 3341 PetscErrorCode ierr; 3342 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3343 PetscSubcomm subcomm; 3344 3345 PetscFunctionBegin; 3346 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3347 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3348 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3349 3350 /* Get info on mapping */ 3351 matis = (Mat_IS*)(mat->data); 3352 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3353 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3354 3355 /* build local CSR graph of subdomains' connectivity */ 3356 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3357 xadj[0] = 0; 3358 xadj[1] = PetscMax(n_neighs-1,0); 3359 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3360 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3361 3362 if (threshold) { 3363 PetscInt xadj_count = 0; 3364 for (i=1;i<n_neighs;i++) { 3365 if (n_shared[i] > threshold) { 3366 adjncy[xadj_count] = neighs[i]; 3367 adjncy_wgt[xadj_count] = n_shared[i]; 3368 xadj_count++; 3369 } 3370 } 3371 xadj[1] = xadj_count; 3372 } else { 3373 if (xadj[1]) { 3374 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3375 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3376 } 3377 } 3378 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3379 if (use_square) { 3380 for (i=0;i<xadj[1];i++) { 3381 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3382 } 3383 } 3384 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3385 3386 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3387 3388 /* 3389 Restrict work on active processes only. 3390 */ 3391 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3392 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3393 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3394 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3395 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3396 if (color) { 3397 ierr = PetscFree(xadj);CHKERRQ(ierr); 3398 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3399 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3400 } else { 3401 PetscInt coarsening_ratio; 3402 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3403 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3404 prank = rank; 3405 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3406 /* 3407 for (i=0;i<size;i++) { 3408 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3409 } 3410 */ 3411 for (i=0;i<xadj[1];i++) { 3412 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3413 } 3414 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3415 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3416 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3417 3418 /* Partition */ 3419 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3420 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3421 if (use_vwgt) { 3422 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3423 v_wgt[0] = local_size; 3424 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3425 } 3426 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3427 coarsening_ratio = size/n_subdomains; 3428 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3429 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3430 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3431 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3432 3433 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3434 if (contiguous) { 3435 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3436 } else { 3437 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3438 } 3439 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3440 /* clean up */ 3441 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3442 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3443 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3444 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3445 } 3446 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3447 3448 /* assemble parallel IS for sends */ 3449 i = 1; 3450 if (color) i=0; 3451 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3452 3453 /* get back IS */ 3454 *is_sends = ranks_send_to; 3455 PetscFunctionReturn(0); 3456 } 3457 3458 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3459 3460 #undef __FUNCT__ 3461 #define __FUNCT__ "MatISSubassemble" 3462 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3463 { 3464 Mat local_mat; 3465 Mat_IS *matis; 3466 IS is_sends_internal; 3467 PetscInt rows,cols,new_local_rows; 3468 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3469 PetscBool ismatis,isdense,newisdense,destroy_mat; 3470 ISLocalToGlobalMapping l2gmap; 3471 PetscInt* l2gmap_indices; 3472 const PetscInt* is_indices; 3473 MatType new_local_type; 3474 /* buffers */ 3475 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3476 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3477 PetscInt *recv_buffer_idxs_local; 3478 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3479 /* MPI */ 3480 MPI_Comm comm,comm_n; 3481 PetscSubcomm subcomm; 3482 PetscMPIInt n_sends,n_recvs,commsize; 3483 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3484 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3485 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3486 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3487 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3488 PetscErrorCode ierr; 3489 3490 PetscFunctionBegin; 3491 /* TODO: add missing checks */ 3492 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3493 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3494 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3495 PetscValidLogicalCollectiveInt(mat,nis,7); 3496 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3497 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3498 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3499 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3500 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3501 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3502 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3503 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3504 PetscInt mrows,mcols,mnrows,mncols; 3505 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3506 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3507 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3508 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3509 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3510 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3511 } 3512 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3513 PetscValidLogicalCollectiveInt(mat,bs,0); 3514 /* prepare IS for sending if not provided */ 3515 if (!is_sends) { 3516 PetscBool pcontig = PETSC_TRUE; 3517 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3518 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3519 } else { 3520 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3521 is_sends_internal = is_sends; 3522 } 3523 3524 /* get pointer of MATIS data */ 3525 matis = (Mat_IS*)mat->data; 3526 3527 /* get comm */ 3528 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3529 3530 /* compute number of sends */ 3531 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3532 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3533 3534 /* compute number of receives */ 3535 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3536 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3537 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3538 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3539 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3540 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3541 ierr = PetscFree(iflags);CHKERRQ(ierr); 3542 3543 /* restrict comm if requested */ 3544 subcomm = 0; 3545 destroy_mat = PETSC_FALSE; 3546 if (restrict_comm) { 3547 PetscMPIInt color,subcommsize; 3548 3549 color = 0; 3550 if (restrict_full) { 3551 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3552 } else { 3553 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3554 } 3555 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3556 subcommsize = commsize - subcommsize; 3557 /* check if reuse has been requested */ 3558 if (reuse == MAT_REUSE_MATRIX) { 3559 if (*mat_n) { 3560 PetscMPIInt subcommsize2; 3561 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3562 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3563 comm_n = PetscObjectComm((PetscObject)*mat_n); 3564 } else { 3565 comm_n = PETSC_COMM_SELF; 3566 } 3567 } else { /* MAT_INITIAL_MATRIX */ 3568 PetscMPIInt rank; 3569 3570 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3571 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3572 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3573 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3574 comm_n = PetscSubcommChild(subcomm); 3575 } 3576 /* flag to destroy *mat_n if not significative */ 3577 if (color) destroy_mat = PETSC_TRUE; 3578 } else { 3579 comm_n = comm; 3580 } 3581 3582 /* prepare send/receive buffers */ 3583 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3584 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3585 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3586 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3587 if (nis) { 3588 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3589 } 3590 3591 /* Get data from local matrices */ 3592 if (!isdense) { 3593 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3594 /* TODO: See below some guidelines on how to prepare the local buffers */ 3595 /* 3596 send_buffer_vals should contain the raw values of the local matrix 3597 send_buffer_idxs should contain: 3598 - MatType_PRIVATE type 3599 - PetscInt size_of_l2gmap 3600 - PetscInt global_row_indices[size_of_l2gmap] 3601 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3602 */ 3603 } else { 3604 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3605 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3606 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3607 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3608 send_buffer_idxs[1] = i; 3609 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3610 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3611 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3612 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3613 for (i=0;i<n_sends;i++) { 3614 ilengths_vals[is_indices[i]] = len*len; 3615 ilengths_idxs[is_indices[i]] = len+2; 3616 } 3617 } 3618 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3619 /* additional is (if any) */ 3620 if (nis) { 3621 PetscMPIInt psum; 3622 PetscInt j; 3623 for (j=0,psum=0;j<nis;j++) { 3624 PetscInt plen; 3625 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3626 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3627 psum += len+1; /* indices + lenght */ 3628 } 3629 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3630 for (j=0,psum=0;j<nis;j++) { 3631 PetscInt plen; 3632 const PetscInt *is_array_idxs; 3633 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3634 send_buffer_idxs_is[psum] = plen; 3635 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3636 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3637 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3638 psum += plen+1; /* indices + lenght */ 3639 } 3640 for (i=0;i<n_sends;i++) { 3641 ilengths_idxs_is[is_indices[i]] = psum; 3642 } 3643 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3644 } 3645 3646 buf_size_idxs = 0; 3647 buf_size_vals = 0; 3648 buf_size_idxs_is = 0; 3649 for (i=0;i<n_recvs;i++) { 3650 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3651 buf_size_vals += (PetscInt)olengths_vals[i]; 3652 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3653 } 3654 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3655 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3656 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3657 3658 /* get new tags for clean communications */ 3659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3660 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3661 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3662 3663 /* allocate for requests */ 3664 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3665 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3667 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3668 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3669 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3670 3671 /* communications */ 3672 ptr_idxs = recv_buffer_idxs; 3673 ptr_vals = recv_buffer_vals; 3674 ptr_idxs_is = recv_buffer_idxs_is; 3675 for (i=0;i<n_recvs;i++) { 3676 source_dest = onodes[i]; 3677 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3678 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3679 ptr_idxs += olengths_idxs[i]; 3680 ptr_vals += olengths_vals[i]; 3681 if (nis) { 3682 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3683 ptr_idxs_is += olengths_idxs_is[i]; 3684 } 3685 } 3686 for (i=0;i<n_sends;i++) { 3687 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3688 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3689 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3690 if (nis) { 3691 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3692 } 3693 } 3694 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3695 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3696 3697 /* assemble new l2g map */ 3698 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3699 ptr_idxs = recv_buffer_idxs; 3700 new_local_rows = 0; 3701 for (i=0;i<n_recvs;i++) { 3702 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3703 ptr_idxs += olengths_idxs[i]; 3704 } 3705 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3706 ptr_idxs = recv_buffer_idxs; 3707 new_local_rows = 0; 3708 for (i=0;i<n_recvs;i++) { 3709 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3710 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3711 ptr_idxs += olengths_idxs[i]; 3712 } 3713 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3714 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3715 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3716 3717 /* infer new local matrix type from received local matrices type */ 3718 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3719 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3720 if (n_recvs) { 3721 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3722 ptr_idxs = recv_buffer_idxs; 3723 for (i=0;i<n_recvs;i++) { 3724 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3725 new_local_type_private = MATAIJ_PRIVATE; 3726 break; 3727 } 3728 ptr_idxs += olengths_idxs[i]; 3729 } 3730 switch (new_local_type_private) { 3731 case MATDENSE_PRIVATE: 3732 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3733 new_local_type = MATSEQAIJ; 3734 bs = 1; 3735 } else { /* if I receive only 1 dense matrix */ 3736 new_local_type = MATSEQDENSE; 3737 bs = 1; 3738 } 3739 break; 3740 case MATAIJ_PRIVATE: 3741 new_local_type = MATSEQAIJ; 3742 bs = 1; 3743 break; 3744 case MATBAIJ_PRIVATE: 3745 new_local_type = MATSEQBAIJ; 3746 break; 3747 case MATSBAIJ_PRIVATE: 3748 new_local_type = MATSEQSBAIJ; 3749 break; 3750 default: 3751 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3752 break; 3753 } 3754 } else { /* by default, new_local_type is seqdense */ 3755 new_local_type = MATSEQDENSE; 3756 bs = 1; 3757 } 3758 3759 /* create MATIS object if needed */ 3760 if (reuse == MAT_INITIAL_MATRIX) { 3761 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3762 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3763 } else { 3764 /* it also destroys the local matrices */ 3765 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3766 } 3767 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3768 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3769 3770 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3771 3772 /* Global to local map of received indices */ 3773 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3774 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3775 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3776 3777 /* restore attributes -> type of incoming data and its size */ 3778 buf_size_idxs = 0; 3779 for (i=0;i<n_recvs;i++) { 3780 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3781 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3782 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3783 } 3784 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3785 3786 /* set preallocation */ 3787 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3788 if (!newisdense) { 3789 PetscInt *new_local_nnz=0; 3790 3791 ptr_vals = recv_buffer_vals; 3792 ptr_idxs = recv_buffer_idxs_local; 3793 if (n_recvs) { 3794 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3795 } 3796 for (i=0;i<n_recvs;i++) { 3797 PetscInt j; 3798 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3799 for (j=0;j<*(ptr_idxs+1);j++) { 3800 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3801 } 3802 } else { 3803 /* TODO */ 3804 } 3805 ptr_idxs += olengths_idxs[i]; 3806 } 3807 if (new_local_nnz) { 3808 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3809 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3810 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3811 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3812 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3813 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3814 } else { 3815 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3816 } 3817 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3818 } else { 3819 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3820 } 3821 3822 /* set values */ 3823 ptr_vals = recv_buffer_vals; 3824 ptr_idxs = recv_buffer_idxs_local; 3825 for (i=0;i<n_recvs;i++) { 3826 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3827 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3828 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3829 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3832 } else { 3833 /* TODO */ 3834 } 3835 ptr_idxs += olengths_idxs[i]; 3836 ptr_vals += olengths_vals[i]; 3837 } 3838 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3840 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3841 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3842 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3843 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3844 3845 #if 0 3846 if (!restrict_comm) { /* check */ 3847 Vec lvec,rvec; 3848 PetscReal infty_error; 3849 3850 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3851 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3852 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3853 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3854 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3855 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3856 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3857 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3858 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3859 } 3860 #endif 3861 3862 /* assemble new additional is (if any) */ 3863 if (nis) { 3864 PetscInt **temp_idxs,*count_is,j,psum; 3865 3866 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3867 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3868 ptr_idxs = recv_buffer_idxs_is; 3869 psum = 0; 3870 for (i=0;i<n_recvs;i++) { 3871 for (j=0;j<nis;j++) { 3872 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3873 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3874 psum += plen; 3875 ptr_idxs += plen+1; /* shift pointer to received data */ 3876 } 3877 } 3878 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3879 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3880 for (i=1;i<nis;i++) { 3881 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3882 } 3883 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3884 ptr_idxs = recv_buffer_idxs_is; 3885 for (i=0;i<n_recvs;i++) { 3886 for (j=0;j<nis;j++) { 3887 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3888 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3889 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3890 ptr_idxs += plen+1; /* shift pointer to received data */ 3891 } 3892 } 3893 for (i=0;i<nis;i++) { 3894 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3895 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3896 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3897 } 3898 ierr = PetscFree(count_is);CHKERRQ(ierr); 3899 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3900 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3901 } 3902 /* free workspace */ 3903 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3904 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3905 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3906 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3907 if (isdense) { 3908 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3909 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3910 } else { 3911 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3912 } 3913 if (nis) { 3914 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3915 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3916 } 3917 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3918 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3919 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3920 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3921 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3922 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3923 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3924 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3925 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3926 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3927 ierr = PetscFree(onodes);CHKERRQ(ierr); 3928 if (nis) { 3929 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3930 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3931 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3932 } 3933 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3934 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3935 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3936 for (i=0;i<nis;i++) { 3937 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3938 } 3939 *mat_n = NULL; 3940 } 3941 PetscFunctionReturn(0); 3942 } 3943 3944 /* temporary hack into ksp private data structure */ 3945 #include <petsc/private/kspimpl.h> 3946 3947 #undef __FUNCT__ 3948 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3949 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3950 { 3951 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3952 PC_IS *pcis = (PC_IS*)pc->data; 3953 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3954 MatNullSpace CoarseNullSpace=NULL; 3955 ISLocalToGlobalMapping coarse_islg; 3956 IS coarse_is,*isarray; 3957 PetscInt i,im_active=-1,active_procs=-1; 3958 PetscInt nis,nisdofs,nisneu; 3959 PC pc_temp; 3960 PCType coarse_pc_type; 3961 KSPType coarse_ksp_type; 3962 PetscBool multilevel_requested,multilevel_allowed; 3963 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3964 Mat t_coarse_mat_is; 3965 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3966 PetscMPIInt all_procs; 3967 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3968 PetscBool compute_vecs = PETSC_FALSE; 3969 PetscScalar *array; 3970 PetscErrorCode ierr; 3971 3972 PetscFunctionBegin; 3973 /* Assign global numbering to coarse dofs */ 3974 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3975 PetscInt ocoarse_size; 3976 compute_vecs = PETSC_TRUE; 3977 ocoarse_size = pcbddc->coarse_size; 3978 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3979 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3980 /* see if we can avoid some work */ 3981 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3982 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3983 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3984 coarse_reuse = PETSC_FALSE; 3985 } else { /* we can safely reuse already computed coarse matrix */ 3986 coarse_reuse = PETSC_TRUE; 3987 } 3988 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3989 coarse_reuse = PETSC_FALSE; 3990 } 3991 /* reset any subassembling information */ 3992 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3993 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3994 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3995 coarse_reuse = PETSC_TRUE; 3996 } 3997 3998 /* count "active" (i.e. with positive local size) and "void" processes */ 3999 im_active = !!(pcis->n); 4000 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4001 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4002 void_procs = all_procs-active_procs; 4003 csin_type_simple = PETSC_TRUE; 4004 redist = PETSC_FALSE; 4005 if (pcbddc->current_level && void_procs) { 4006 csin_ml = PETSC_TRUE; 4007 ncoarse_ml = void_procs; 4008 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4009 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4010 csin_ds = PETSC_TRUE; 4011 ncoarse_ds = pcbddc->redistribute_coarse; 4012 redist = PETSC_TRUE; 4013 } else { 4014 csin_ds = PETSC_TRUE; 4015 ncoarse_ds = active_procs; 4016 redist = PETSC_TRUE; 4017 } 4018 } else { 4019 csin_ml = PETSC_FALSE; 4020 ncoarse_ml = all_procs; 4021 if (void_procs) { 4022 csin_ds = PETSC_TRUE; 4023 ncoarse_ds = void_procs; 4024 csin_type_simple = PETSC_FALSE; 4025 } else { 4026 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4027 csin_ds = PETSC_TRUE; 4028 ncoarse_ds = pcbddc->redistribute_coarse; 4029 redist = PETSC_TRUE; 4030 } else { 4031 csin_ds = PETSC_FALSE; 4032 ncoarse_ds = all_procs; 4033 } 4034 } 4035 } 4036 4037 /* 4038 test if we can go multilevel: three conditions must be satisfied: 4039 - we have not exceeded the number of levels requested 4040 - we can actually subassemble the active processes 4041 - we can find a suitable number of MPI processes where we can place the subassembled problem 4042 */ 4043 multilevel_allowed = PETSC_FALSE; 4044 multilevel_requested = PETSC_FALSE; 4045 if (pcbddc->current_level < pcbddc->max_levels) { 4046 multilevel_requested = PETSC_TRUE; 4047 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4048 multilevel_allowed = PETSC_FALSE; 4049 } else { 4050 multilevel_allowed = PETSC_TRUE; 4051 } 4052 } 4053 /* determine number of process partecipating to coarse solver */ 4054 if (multilevel_allowed) { 4055 ncoarse = ncoarse_ml; 4056 csin = csin_ml; 4057 redist = PETSC_FALSE; 4058 } else { 4059 ncoarse = ncoarse_ds; 4060 csin = csin_ds; 4061 } 4062 4063 /* creates temporary l2gmap and IS for coarse indexes */ 4064 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4065 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4066 4067 /* creates temporary MATIS object for coarse matrix */ 4068 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4069 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4070 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4071 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4072 #if 0 4073 { 4074 PetscViewer viewer; 4075 char filename[256]; 4076 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4077 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4078 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4079 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4080 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4081 } 4082 #endif 4083 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4084 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4085 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4086 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4087 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4088 4089 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4090 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4091 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4092 const PetscInt *idxs; 4093 ISLocalToGlobalMapping tmap; 4094 4095 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4096 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4097 /* allocate space for temporary storage */ 4098 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4099 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4100 /* allocate for IS array */ 4101 nisdofs = pcbddc->n_ISForDofsLocal; 4102 nisneu = !!pcbddc->NeumannBoundariesLocal; 4103 nis = nisdofs + nisneu; 4104 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4105 /* dofs splitting */ 4106 for (i=0;i<nisdofs;i++) { 4107 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4108 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4109 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4110 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4111 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4112 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4113 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4114 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4115 } 4116 /* neumann boundaries */ 4117 if (pcbddc->NeumannBoundariesLocal) { 4118 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4119 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4120 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4121 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4122 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4123 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4124 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4125 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4126 } 4127 /* free memory */ 4128 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4129 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4130 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4131 } else { 4132 nis = 0; 4133 nisdofs = 0; 4134 nisneu = 0; 4135 isarray = NULL; 4136 } 4137 /* destroy no longer needed map */ 4138 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4139 4140 /* restrict on coarse candidates (if needed) */ 4141 coarse_mat_is = NULL; 4142 if (csin) { 4143 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4144 if (redist) { 4145 PetscMPIInt rank; 4146 PetscInt spc,n_spc_p1,dest[1],destsize; 4147 4148 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4149 spc = active_procs/ncoarse; 4150 n_spc_p1 = active_procs%ncoarse; 4151 if (im_active) { 4152 destsize = 1; 4153 if (rank > n_spc_p1*(spc+1)-1) { 4154 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4155 } else { 4156 dest[0] = rank/(spc+1); 4157 } 4158 } else { 4159 destsize = 0; 4160 } 4161 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4162 } else if (csin_type_simple) { 4163 PetscMPIInt rank; 4164 PetscInt issize,isidx; 4165 4166 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4167 if (im_active) { 4168 issize = 1; 4169 isidx = (PetscInt)rank; 4170 } else { 4171 issize = 0; 4172 isidx = -1; 4173 } 4174 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4175 } else { /* get a suitable subassembling pattern from MATIS code */ 4176 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4177 } 4178 4179 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4180 if (!redist || ncoarse <= void_procs) { 4181 PetscInt ncoarse_cand,tissize,*nisindices; 4182 PetscInt *coarse_candidates; 4183 const PetscInt* tisindices; 4184 4185 /* get coarse candidates' ranks in pc communicator */ 4186 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4187 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4188 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4189 if (!coarse_candidates[i]) { 4190 coarse_candidates[ncoarse_cand++]=i; 4191 } 4192 } 4193 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4194 4195 4196 if (pcbddc->dbg_flag) { 4197 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4199 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4200 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4201 for (i=0;i<ncoarse_cand;i++) { 4202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4203 } 4204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4206 } 4207 /* shift the pattern on coarse candidates */ 4208 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4209 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4210 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4211 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4212 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4213 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4214 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4215 } 4216 if (pcbddc->dbg_flag) { 4217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4219 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4220 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4221 } 4222 } 4223 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4224 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4225 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4226 } else { /* this is the last level, so use just receiving processes in subcomm */ 4227 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4228 } 4229 } else { 4230 if (pcbddc->dbg_flag) { 4231 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4232 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4234 } 4235 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4236 coarse_mat_is = t_coarse_mat_is; 4237 } 4238 4239 /* create local to global scatters for coarse problem */ 4240 if (compute_vecs) { 4241 PetscInt lrows; 4242 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4243 if (coarse_mat_is) { 4244 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4245 } else { 4246 lrows = 0; 4247 } 4248 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4249 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4250 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4251 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4252 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4253 } 4254 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4255 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4256 4257 /* set defaults for coarse KSP and PC */ 4258 if (multilevel_allowed) { 4259 coarse_ksp_type = KSPRICHARDSON; 4260 coarse_pc_type = PCBDDC; 4261 } else { 4262 coarse_ksp_type = KSPPREONLY; 4263 coarse_pc_type = PCREDUNDANT; 4264 } 4265 4266 /* print some info if requested */ 4267 if (pcbddc->dbg_flag) { 4268 if (!multilevel_allowed) { 4269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4270 if (multilevel_requested) { 4271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4272 } else if (pcbddc->max_levels) { 4273 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4274 } 4275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4276 } 4277 } 4278 4279 /* create the coarse KSP object only once with defaults */ 4280 if (coarse_mat_is) { 4281 MatReuse coarse_mat_reuse; 4282 PetscViewer dbg_viewer = NULL; 4283 if (pcbddc->dbg_flag) { 4284 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4285 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4286 } 4287 if (!pcbddc->coarse_ksp) { 4288 char prefix[256],str_level[16]; 4289 size_t len; 4290 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4291 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4292 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4293 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4294 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4295 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4296 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4297 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4298 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4299 /* prefix */ 4300 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4301 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4302 if (!pcbddc->current_level) { 4303 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4304 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4305 } else { 4306 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4307 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4308 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4309 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4310 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4311 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4312 } 4313 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4314 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4315 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4316 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4317 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4318 /* allow user customization */ 4319 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4320 } 4321 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4322 if (nisdofs) { 4323 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4324 for (i=0;i<nisdofs;i++) { 4325 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4326 } 4327 } 4328 if (nisneu) { 4329 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4330 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4331 } 4332 4333 /* get some info after set from options */ 4334 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4335 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4336 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4337 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4338 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4339 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4340 isbddc = PETSC_FALSE; 4341 } 4342 if (isredundant) { 4343 KSP inner_ksp; 4344 PC inner_pc; 4345 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4346 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4347 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4348 } 4349 4350 /* assemble coarse matrix */ 4351 if (coarse_reuse) { 4352 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4353 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4354 coarse_mat_reuse = MAT_REUSE_MATRIX; 4355 } else { 4356 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4357 } 4358 if (isbddc || isnn) { 4359 if (pcbddc->coarsening_ratio > 1) { 4360 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4361 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4362 if (pcbddc->dbg_flag) { 4363 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4364 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4365 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4366 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4367 } 4368 } 4369 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4370 } else { 4371 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4372 coarse_mat = coarse_mat_is; 4373 } 4374 } else { 4375 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4376 } 4377 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4378 4379 /* propagate symmetry info of coarse matrix */ 4380 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4381 if (pc->pmat->symmetric_set) { 4382 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4383 } 4384 if (pc->pmat->hermitian_set) { 4385 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4386 } 4387 if (pc->pmat->spd_set) { 4388 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4389 } 4390 /* set operators */ 4391 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4392 if (pcbddc->dbg_flag) { 4393 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4394 } 4395 } else { /* processes non partecipating to coarse solver (if any) */ 4396 coarse_mat = 0; 4397 } 4398 ierr = PetscFree(isarray);CHKERRQ(ierr); 4399 #if 0 4400 { 4401 PetscViewer viewer; 4402 char filename[256]; 4403 sprintf(filename,"coarse_mat.m"); 4404 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4405 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4406 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4407 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4408 } 4409 #endif 4410 4411 /* Compute coarse null space (special handling by BDDC only) */ 4412 if (pcbddc->NullSpace) { 4413 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4414 } 4415 4416 if (pcbddc->coarse_ksp) { 4417 Vec crhs,csol; 4418 PetscBool ispreonly; 4419 4420 if (CoarseNullSpace) { 4421 if (isbddc) { 4422 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4423 } else { 4424 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4425 } 4426 } 4427 /* setup coarse ksp */ 4428 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4429 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4430 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4431 /* hack */ 4432 if (!csol) { 4433 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4434 } 4435 if (!crhs) { 4436 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4437 } 4438 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4439 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4440 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4441 KSP check_ksp; 4442 KSPType check_ksp_type; 4443 PC check_pc; 4444 Vec check_vec,coarse_vec; 4445 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4446 PetscInt its; 4447 PetscBool compute_eigs; 4448 PetscReal *eigs_r,*eigs_c; 4449 PetscInt neigs; 4450 const char *prefix; 4451 4452 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4453 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4454 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4455 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4456 if (ispreonly) { 4457 check_ksp_type = KSPPREONLY; 4458 compute_eigs = PETSC_FALSE; 4459 } else { 4460 check_ksp_type = KSPGMRES; 4461 compute_eigs = PETSC_TRUE; 4462 } 4463 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4464 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4465 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4466 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4467 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4468 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4469 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4470 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4471 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4472 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4473 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4474 /* create random vec */ 4475 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4476 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4477 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4478 if (CoarseNullSpace) { 4479 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4480 } 4481 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4482 /* solve coarse problem */ 4483 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4484 if (CoarseNullSpace) { 4485 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4486 } 4487 /* set eigenvalue estimation if preonly has not been requested */ 4488 if (compute_eigs) { 4489 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4490 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4491 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4492 lambda_max = eigs_r[neigs-1]; 4493 lambda_min = eigs_r[0]; 4494 if (pcbddc->use_coarse_estimates) { 4495 if (lambda_max>lambda_min) { 4496 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4497 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4498 } 4499 } 4500 } 4501 4502 /* check coarse problem residual error */ 4503 if (pcbddc->dbg_flag) { 4504 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4505 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4506 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4507 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4508 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4509 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4510 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4511 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4512 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4513 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4514 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4515 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4516 if (compute_eigs) { 4517 PetscReal lambda_max_s,lambda_min_s; 4518 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4519 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4520 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4522 for (i=0;i<neigs;i++) { 4523 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4524 } 4525 } 4526 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4527 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4528 } 4529 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4530 if (compute_eigs) { 4531 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4532 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4533 } 4534 } 4535 } 4536 /* print additional info */ 4537 if (pcbddc->dbg_flag) { 4538 /* waits until all processes reaches this point */ 4539 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4540 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4542 } 4543 4544 /* free memory */ 4545 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4546 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4547 PetscFunctionReturn(0); 4548 } 4549 4550 #undef __FUNCT__ 4551 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4552 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4553 { 4554 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4555 PC_IS* pcis = (PC_IS*)pc->data; 4556 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4557 IS subset,subset_mult,subset_n; 4558 PetscInt local_size,coarse_size=0; 4559 PetscInt *local_primal_indices=NULL; 4560 const PetscInt *t_local_primal_indices; 4561 PetscErrorCode ierr; 4562 4563 PetscFunctionBegin; 4564 /* Compute global number of coarse dofs */ 4565 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4566 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4567 } 4568 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4569 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4570 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4571 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4572 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4573 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4574 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4575 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4576 if (local_size != pcbddc->local_primal_size) { 4577 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4578 } 4579 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4580 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4581 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4582 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4583 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4584 4585 /* check numbering */ 4586 if (pcbddc->dbg_flag) { 4587 PetscScalar coarsesum,*array; 4588 PetscInt i; 4589 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4590 4591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4593 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4594 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4595 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4596 for (i=0;i<pcbddc->local_primal_size;i++) { 4597 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4598 } 4599 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4600 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4601 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4602 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4603 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4604 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4605 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4606 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4607 for (i=0;i<pcis->n;i++) { 4608 if (array[i] == 1.0) { 4609 set_error = PETSC_TRUE; 4610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4611 } 4612 } 4613 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4615 for (i=0;i<pcis->n;i++) { 4616 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4617 } 4618 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4619 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4620 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4621 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4622 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4623 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4624 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4625 PetscInt *gidxs; 4626 4627 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4628 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4629 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4630 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4632 for (i=0;i<pcbddc->local_primal_size;i++) { 4633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4634 } 4635 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4636 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4637 } 4638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4639 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4640 } 4641 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4642 /* get back data */ 4643 *coarse_size_n = coarse_size; 4644 *local_primal_indices_n = local_primal_indices; 4645 PetscFunctionReturn(0); 4646 } 4647 4648 #undef __FUNCT__ 4649 #define __FUNCT__ "PCBDDCGlobalToLocal" 4650 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4651 { 4652 IS localis_t; 4653 PetscInt i,lsize,*idxs,n; 4654 PetscScalar *vals; 4655 PetscErrorCode ierr; 4656 4657 PetscFunctionBegin; 4658 /* get indices in local ordering exploiting local to global map */ 4659 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4660 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4661 for (i=0;i<lsize;i++) vals[i] = 1.0; 4662 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4663 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4664 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4665 if (idxs) { /* multilevel guard */ 4666 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4667 } 4668 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4669 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4670 ierr = PetscFree(vals);CHKERRQ(ierr); 4671 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4672 /* now compute set in local ordering */ 4673 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4674 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4675 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4676 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4677 for (i=0,lsize=0;i<n;i++) { 4678 if (PetscRealPart(vals[i]) > 0.5) { 4679 lsize++; 4680 } 4681 } 4682 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4683 for (i=0,lsize=0;i<n;i++) { 4684 if (PetscRealPart(vals[i]) > 0.5) { 4685 idxs[lsize++] = i; 4686 } 4687 } 4688 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4689 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4690 *localis = localis_t; 4691 PetscFunctionReturn(0); 4692 } 4693 4694 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4695 #undef __FUNCT__ 4696 #define __FUNCT__ "PCBDDCMatMult_Private" 4697 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4698 { 4699 PCBDDCChange_ctx change_ctx; 4700 PetscErrorCode ierr; 4701 4702 PetscFunctionBegin; 4703 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4704 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4705 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4706 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4707 PetscFunctionReturn(0); 4708 } 4709 4710 #undef __FUNCT__ 4711 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4712 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4713 { 4714 PCBDDCChange_ctx change_ctx; 4715 PetscErrorCode ierr; 4716 4717 PetscFunctionBegin; 4718 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4719 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4720 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4721 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 #undef __FUNCT__ 4726 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4727 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4728 { 4729 PC_IS *pcis=(PC_IS*)pc->data; 4730 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4731 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4732 Mat S_j; 4733 PetscInt *used_xadj,*used_adjncy; 4734 PetscBool free_used_adj; 4735 PetscErrorCode ierr; 4736 4737 PetscFunctionBegin; 4738 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4739 free_used_adj = PETSC_FALSE; 4740 if (pcbddc->sub_schurs_layers == -1) { 4741 used_xadj = NULL; 4742 used_adjncy = NULL; 4743 } else { 4744 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4745 used_xadj = pcbddc->mat_graph->xadj; 4746 used_adjncy = pcbddc->mat_graph->adjncy; 4747 } else if (pcbddc->computed_rowadj) { 4748 used_xadj = pcbddc->mat_graph->xadj; 4749 used_adjncy = pcbddc->mat_graph->adjncy; 4750 } else { 4751 PetscBool flg_row=PETSC_FALSE; 4752 const PetscInt *xadj,*adjncy; 4753 PetscInt nvtxs; 4754 4755 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4756 if (flg_row) { 4757 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4758 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4759 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4760 free_used_adj = PETSC_TRUE; 4761 } else { 4762 pcbddc->sub_schurs_layers = -1; 4763 used_xadj = NULL; 4764 used_adjncy = NULL; 4765 } 4766 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4767 } 4768 } 4769 4770 /* setup sub_schurs data */ 4771 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4772 if (!sub_schurs->use_mumps) { 4773 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4774 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4775 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4776 } else { 4777 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4778 PetscBool isseqaij; 4779 if (!pcbddc->use_vertices && reuse_solvers) { 4780 PetscInt n_vertices; 4781 4782 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4783 reuse_solvers = (PetscBool)!n_vertices; 4784 } 4785 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4786 if (!isseqaij) { 4787 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4788 if (matis->A == pcbddc->local_mat) { 4789 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4790 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4791 } else { 4792 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4793 } 4794 } 4795 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4796 } 4797 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4798 4799 /* free adjacency */ 4800 if (free_used_adj) { 4801 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4802 } 4803 PetscFunctionReturn(0); 4804 } 4805 4806 #undef __FUNCT__ 4807 #define __FUNCT__ "PCBDDCInitSubSchurs" 4808 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4809 { 4810 PC_IS *pcis=(PC_IS*)pc->data; 4811 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4812 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4813 PCBDDCGraph graph; 4814 PetscErrorCode ierr; 4815 4816 PetscFunctionBegin; 4817 /* attach interface graph for determining subsets */ 4818 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4819 IS verticesIS,verticescomm; 4820 PetscInt vsize,*idxs; 4821 4822 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4823 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4824 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4825 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4826 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4827 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4828 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4829 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4830 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4831 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4832 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4833 /* 4834 if (pcbddc->dbg_flag) { 4835 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4836 } 4837 */ 4838 } else { 4839 graph = pcbddc->mat_graph; 4840 } 4841 4842 /* sub_schurs init */ 4843 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4844 4845 /* free graph struct */ 4846 if (pcbddc->sub_schurs_rebuild) { 4847 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4848 } 4849 PetscFunctionReturn(0); 4850 } 4851