1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 472 { 473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 474 PC_IS *pcis = (PC_IS*)pc->data; 475 VecType impVecType; 476 PetscInt n_constraints,n_R,old_size; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->ConstraintMatrix) { 481 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 482 } 483 /* get sizes */ 484 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 485 n_R = pcis->n-pcbddc->n_vertices; 486 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 487 /* local work vectors (try to avoid unneeded work)*/ 488 /* R nodes */ 489 old_size = -1; 490 if (pcbddc->vec1_R) { 491 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 492 } 493 if (n_R != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 499 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 500 } 501 /* local primal dofs */ 502 old_size = -1; 503 if (pcbddc->vec1_P) { 504 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 505 } 506 if (pcbddc->local_primal_size != old_size) { 507 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 508 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 510 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 511 } 512 /* local explicit constraints */ 513 old_size = -1; 514 if (pcbddc->vec1_C) { 515 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 516 } 517 if (n_constraints && n_constraints != old_size) { 518 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 519 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 520 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 521 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSetUpCorrection" 528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 529 { 530 PetscErrorCode ierr; 531 /* pointers to pcis and pcbddc */ 532 PC_IS* pcis = (PC_IS*)pc->data; 533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 535 /* submatrices of local problem */ 536 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 537 /* submatrices of local coarse problem */ 538 Mat S_VV,S_CV,S_VC,S_CC; 539 /* working matrices */ 540 Mat C_CR; 541 /* additional working stuff */ 542 PC pc_R; 543 Mat F; 544 PetscBool isLU,isCHOL,isILU; 545 546 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 547 PetscScalar *work; 548 PetscInt *idx_V_B; 549 PetscInt n,n_vertices,n_constraints; 550 PetscInt i,n_R,n_D,n_B; 551 PetscBool unsymmetric_check; 552 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 553 MatType impMatType; 554 /* some shortcuts to scalars */ 555 PetscScalar one=1.0,m_one=-1.0; 556 557 PetscFunctionBegin; 558 n_vertices = pcbddc->n_vertices; 559 n_constraints = pcbddc->local_primal_size-n_vertices; 560 /* Set Non-overlapping dimensions */ 561 n_B = pcis->n_B; 562 n_D = pcis->n - n_B; 563 n_R = pcis->n - n_vertices; 564 565 /* Set types for local objects needed by BDDC precondtioner */ 566 impMatType = MATSEQDENSE; 567 568 /* vertices in boundary numbering */ 569 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 570 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 571 if (i != n_vertices) { 572 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 573 } 574 575 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 576 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 586 unsymmetric_check = PETSC_FALSE; 587 /* allocate workspace */ 588 n = 0; 589 if (n_constraints) { 590 n += n_R*n_constraints; 591 } 592 if (n_vertices) { 593 n = PetscMax(2*n_R*n_vertices,n); 594 n = PetscMax((n_R+n_B)*n_vertices,n); 595 } 596 if (!pcbddc->symmetric_primal) { 597 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 598 unsymmetric_check = PETSC_TRUE; 599 } 600 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 601 602 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 603 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 607 if (isLU || isILU || isCHOL) { 608 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 609 } else if (sub_schurs->reuse_mumps) { 610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 611 MatFactorType type; 612 613 F = reuse_mumps->F; 614 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 615 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 616 } else { 617 F = NULL; 618 } 619 620 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 621 if (n_constraints) { 622 Mat M1,M2,M3; 623 Mat auxmat; 624 IS is_aux; 625 PetscScalar *array,*array2; 626 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 630 /* Extract constraints on R nodes: C_{CR} */ 631 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 632 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 633 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 634 635 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 636 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 637 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 const PetscScalar *row_cmat_values; 640 const PetscInt *row_cmat_indices; 641 PetscInt size_of_constraint,j; 642 643 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 644 for (j=0;j<size_of_constraint;j++) { 645 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 646 } 647 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 650 if (F) { 651 Mat B; 652 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 654 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 655 ierr = MatDestroy(&B);CHKERRQ(ierr); 656 } else { 657 PetscScalar *marr; 658 659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 660 for (i=0;i<n_constraints;i++) { 661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 666 } 667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 668 } 669 if (!pcbddc->switch_static) { 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 for (i=0;i<n_constraints;i++) { 674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 680 } 681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 683 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 684 } else { 685 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 686 pcbddc->local_auxmat2 = local_auxmat2_R; 687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 694 if (isCHOL) { 695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 696 } else { 697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 698 } 699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 705 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 706 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 709 } 710 /* Get submatrices from subdomain matrix */ 711 if (n_vertices) { 712 IS is_aux; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 728 } 729 730 /* Matrix of coarse basis functions (local) */ 731 if (pcbddc->coarse_phi_B) { 732 PetscInt on_B,on_primal,on_D=n_D; 733 if (pcbddc->coarse_phi_D) { 734 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 735 } 736 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 737 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 738 PetscScalar *marray; 739 740 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 741 ierr = PetscFree(marray);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 746 } 747 } 748 749 if (!pcbddc->coarse_phi_B) { 750 PetscScalar *marray; 751 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 n += n_D*pcbddc->local_primal_size; 755 } 756 if (!pcbddc->symmetric_primal) { 757 n *= 2; 758 } 759 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 761 n = n_B*pcbddc->local_primal_size; 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 n += n_D*pcbddc->local_primal_size; 765 } 766 if (!pcbddc->symmetric_primal) { 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 769 n = n_B*pcbddc->local_primal_size; 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 774 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 775 if (pcbddc->switch_static || pcbddc->dbg_flag) { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 777 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 778 } 779 } 780 } 781 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 782 /* vertices */ 783 if (n_vertices) { 784 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 787 if (n_R) { 788 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 789 PetscBLASInt B_N,B_one = 1; 790 PetscScalar *x,*y; 791 PetscBool isseqaij; 792 793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 794 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 796 if (F) { /* TODO could be optimized for symmetric problems */ 797 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 798 } else { 799 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 803 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 810 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 811 if (n_constraints) { 812 Mat B; 813 814 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 815 for (i=0;i<n_vertices;i++) { 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 817 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 818 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 822 } 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 828 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 829 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 831 ierr = MatDestroy(&B);CHKERRQ(ierr); 832 } 833 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 834 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 835 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 836 } 837 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 838 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 839 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 842 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 843 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 845 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 847 } else { 848 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_vertices;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 y[n_B*i+idx_V_B[i]] = 1.0; 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 863 864 if (pcbddc->switch_static || pcbddc->dbg_flag) { 865 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 } 872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 873 } 874 /* if n_R == 0 the object is not destroyed */ 875 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 876 } 877 878 if (n_constraints) { 879 Mat B; 880 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 882 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 883 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 if (n_vertices) { 886 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 887 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 888 } else { 889 Mat S_VCt; 890 891 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 892 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 893 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<n_constraints;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 909 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 910 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 911 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 } 916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 917 } 918 } 919 if (n_constraints) { 920 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 921 } 922 923 /* compute other basis functions for non-symmetric problems */ 924 if (!pcbddc->symmetric_primal) { 925 926 if (n_constraints) { 927 Mat S_CCT,B_C; 928 929 /* this is a lazy thing */ 930 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 932 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 933 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 934 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 935 if (n_vertices) { 936 Mat B_V,S_VCT; 937 938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 940 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 941 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 943 } 944 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 945 } else { /* if there are no constraints, reset work */ 946 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 if (n_vertices && n_R) { 949 Mat A_VRT; 950 PetscScalar *marray; 951 PetscBLASInt B_N,B_one = 1; 952 953 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 954 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 955 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 957 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 958 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 960 } 961 962 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 965 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 966 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 969 } 970 } else { 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 973 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 977 } 978 } 979 /* coarse basis functions */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 PetscScalar *y; 982 983 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 984 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 if (i<n_vertices) { 989 y[n_B*i+idx_V_B[i]] = 1.0; 990 } 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 992 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 993 994 if (pcbddc->switch_static || pcbddc->dbg_flag) { 995 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1001 } 1002 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1003 } 1004 } 1005 /* free memory */ 1006 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1008 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (n_vertices) { 1013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1014 } 1015 if (n_constraints) { 1016 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1017 } 1018 /* Checking coarse_sub_mat and coarse basis functios */ 1019 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1020 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1021 if (pcbddc->dbg_flag) { 1022 Mat coarse_sub_mat; 1023 Mat AUXMAT,TM1,TM2,TM3,TM4; 1024 Mat coarse_phi_D,coarse_phi_B; 1025 Mat coarse_psi_D,coarse_psi_B; 1026 Mat A_II,A_BB,A_IB,A_BI; 1027 Mat C_B,CPHI; 1028 IS is_dummy; 1029 Vec mones; 1030 MatType checkmattype=MATSEQAIJ; 1031 PetscReal real_value; 1032 1033 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1034 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1035 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1042 } 1043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1048 if (unsymmetric_check) { 1049 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1063 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 } 1071 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 1080 /* check constraints */ 1081 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1082 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1083 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1084 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1085 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1086 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1087 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 if (unsymmetric_check) { 1090 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 } 1096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1097 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1098 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1099 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1103 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1107 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1112 if (unsymmetric_check) { 1113 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1115 } 1116 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1117 } 1118 /* get back data */ 1119 *coarse_submat_vals_n = coarse_submat_vals; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1126 { 1127 Mat *work_mat; 1128 IS isrow_s,iscol_s; 1129 PetscBool rsorted,csorted; 1130 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1135 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1136 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1137 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1138 1139 if (!rsorted) { 1140 const PetscInt *idxs; 1141 PetscInt *idxs_sorted,i; 1142 1143 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1144 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1145 for (i=0;i<rsize;i++) { 1146 idxs_perm_r[i] = i; 1147 } 1148 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1149 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1150 for (i=0;i<rsize;i++) { 1151 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1152 } 1153 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1154 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1155 } else { 1156 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1157 isrow_s = isrow; 1158 } 1159 1160 if (!csorted) { 1161 if (isrow == iscol) { 1162 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1163 iscol_s = isrow_s; 1164 } else { 1165 const PetscInt *idxs; 1166 PetscInt *idxs_sorted,i; 1167 1168 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1170 for (i=0;i<csize;i++) { 1171 idxs_perm_c[i] = i; 1172 } 1173 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1174 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1175 for (i=0;i<csize;i++) { 1176 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1177 } 1178 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1179 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 iscol_s = iscol; 1184 } 1185 1186 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1187 1188 if (!rsorted || !csorted) { 1189 Mat new_mat; 1190 IS is_perm_r,is_perm_c; 1191 1192 if (!rsorted) { 1193 PetscInt *idxs_r,i; 1194 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1195 for (i=0;i<rsize;i++) { 1196 idxs_r[idxs_perm_r[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1200 } else { 1201 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1202 } 1203 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1204 1205 if (!csorted) { 1206 if (isrow_s == iscol_s) { 1207 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1208 is_perm_c = is_perm_r; 1209 } else { 1210 PetscInt *idxs_c,i; 1211 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1212 for (i=0;i<csize;i++) { 1213 idxs_c[idxs_perm_c[i]] = i; 1214 } 1215 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 } else { 1219 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1222 1223 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1225 work_mat[0] = new_mat; 1226 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1227 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1228 } 1229 1230 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1231 *B = work_mat[0]; 1232 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1234 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1241 { 1242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1243 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1244 Mat new_mat; 1245 IS is_local,is_global; 1246 PetscInt local_size; 1247 PetscBool isseqaij; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1252 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1253 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1255 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1256 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1257 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1258 1259 /* check */ 1260 if (pcbddc->dbg_flag) { 1261 Vec x,x_change; 1262 PetscReal error; 1263 1264 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1265 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1266 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1267 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1270 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1273 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1275 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1276 ierr = VecDestroy(&x);CHKERRQ(ierr); 1277 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1278 } 1279 1280 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1281 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1282 if (isseqaij) { 1283 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1284 } else { 1285 Mat work_mat; 1286 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1287 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1288 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1289 } 1290 if (matis->A->symmetric_set) { 1291 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1292 #if !defined(PETSC_USE_COMPLEX) 1293 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1294 #endif 1295 } 1296 /* 1297 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1298 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1299 */ 1300 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1306 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1307 { 1308 PC_IS* pcis = (PC_IS*)(pc->data); 1309 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1310 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1311 PetscInt *idx_R_local=NULL; 1312 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1313 PetscInt vbs,bs; 1314 PetscBT bitmask=NULL; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 /* 1319 No need to setup local scatters if 1320 - primal space is unchanged 1321 AND 1322 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1323 AND 1324 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1325 */ 1326 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1327 PetscFunctionReturn(0); 1328 } 1329 /* destroy old objects */ 1330 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1331 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1333 /* Set Non-overlapping dimensions */ 1334 n_B = pcis->n_B; 1335 n_D = pcis->n - n_B; 1336 n_vertices = pcbddc->n_vertices; 1337 1338 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1339 1340 /* create auxiliary bitmask and allocate workspace */ 1341 if (!sub_schurs->reuse_mumps) { 1342 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1343 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1344 for (i=0;i<n_vertices;i++) { 1345 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1346 } 1347 1348 for (i=0, n_R=0; i<pcis->n; i++) { 1349 if (!PetscBTLookup(bitmask,i)) { 1350 idx_R_local[n_R++] = i; 1351 } 1352 } 1353 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1354 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1355 1356 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1357 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1358 } 1359 1360 /* Block code */ 1361 vbs = 1; 1362 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1363 if (bs>1 && !(n_vertices%bs)) { 1364 PetscBool is_blocked = PETSC_TRUE; 1365 PetscInt *vary; 1366 if (!sub_schurs->reuse_mumps) { 1367 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1368 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1369 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1370 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1371 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1372 for (i=0; i<pcis->n/bs; i++) { 1373 if (vary[i]!=0 && vary[i]!=bs) { 1374 is_blocked = PETSC_FALSE; 1375 break; 1376 } 1377 } 1378 ierr = PetscFree(vary);CHKERRQ(ierr); 1379 } else { 1380 /* Verify directly the R set */ 1381 for (i=0; i<n_R/bs; i++) { 1382 PetscInt j,node=idx_R_local[bs*i]; 1383 for (j=1; j<bs; j++) { 1384 if (node != idx_R_local[bs*i+j]-j) { 1385 is_blocked = PETSC_FALSE; 1386 break; 1387 } 1388 } 1389 } 1390 } 1391 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1392 vbs = bs; 1393 for (i=0;i<n_R/vbs;i++) { 1394 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1395 } 1396 } 1397 } 1398 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1399 if (sub_schurs->reuse_mumps) { 1400 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1401 1402 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1403 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1404 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1405 reuse_mumps->is_R = pcbddc->is_R_local; 1406 } else { 1407 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1408 } 1409 1410 /* print some info if requested */ 1411 if (pcbddc->dbg_flag) { 1412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1419 } 1420 1421 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1422 if (!sub_schurs->reuse_mumps) { 1423 IS is_aux1,is_aux2; 1424 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1425 1426 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1427 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1429 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1430 for (i=0; i<n_D; i++) { 1431 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1432 } 1433 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1434 for (i=0, j=0; i<n_R; i++) { 1435 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1436 aux_array1[j++] = i; 1437 } 1438 } 1439 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1440 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1441 for (i=0, j=0; i<n_B; i++) { 1442 if (!PetscBTLookup(bitmask,is_indices[i])) { 1443 aux_array2[j++] = i; 1444 } 1445 } 1446 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1447 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1448 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1449 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1451 1452 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1453 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1454 for (i=0, j=0; i<n_R; i++) { 1455 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1456 aux_array1[j++] = i; 1457 } 1458 } 1459 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1460 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1461 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1462 } 1463 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1464 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1465 } else { 1466 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1467 IS tis; 1468 PetscInt schur_size; 1469 1470 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1471 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1472 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1473 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1475 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1476 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1477 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1478 } 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1486 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1487 { 1488 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1489 PC_IS *pcis = (PC_IS*)pc->data; 1490 PC pc_temp; 1491 Mat A_RR; 1492 MatReuse reuse; 1493 PetscScalar m_one = -1.0; 1494 PetscReal value; 1495 PetscInt n_D,n_R; 1496 PetscBool use_exact,use_exact_reduced,issbaij; 1497 PetscErrorCode ierr; 1498 /* prefixes stuff */ 1499 char dir_prefix[256],neu_prefix[256],str_level[16]; 1500 size_t len; 1501 1502 PetscFunctionBegin; 1503 1504 /* compute prefixes */ 1505 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1506 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1507 if (!pcbddc->current_level) { 1508 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1509 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1510 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1511 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1512 } else { 1513 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1514 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1515 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1516 len -= 15; /* remove "pc_bddc_coarse_" */ 1517 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1518 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1519 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1520 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1521 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1522 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1523 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1524 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1525 } 1526 1527 /* DIRICHLET PROBLEM */ 1528 if (dirichlet) { 1529 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1530 if (pcbddc->local_mat->symmetric_set) { 1531 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1532 } 1533 /* Matrix for Dirichlet problem is pcis->A_II */ 1534 n_D = pcis->n - pcis->n_B; 1535 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1536 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1537 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1538 /* default */ 1539 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1540 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1541 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1542 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1543 if (issbaij) { 1544 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1545 } else { 1546 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1547 } 1548 /* Allow user's customization */ 1549 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1550 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1551 } 1552 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1553 if (sub_schurs->reuse_mumps) { 1554 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1555 1556 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1557 } 1558 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1559 if (!n_D) { 1560 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1561 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1562 } 1563 /* Set Up KSP for Dirichlet problem of BDDC */ 1564 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1565 /* set ksp_D into pcis data */ 1566 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1567 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1568 pcis->ksp_D = pcbddc->ksp_D; 1569 } 1570 1571 /* NEUMANN PROBLEM */ 1572 A_RR = 0; 1573 if (neumann) { 1574 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1575 PetscInt ibs,mbs; 1576 PetscBool issbaij; 1577 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1578 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1579 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1580 if (pcbddc->ksp_R) { /* already created ksp */ 1581 PetscInt nn_R; 1582 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1583 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1584 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1585 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1586 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1587 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1588 reuse = MAT_INITIAL_MATRIX; 1589 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1590 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1592 reuse = MAT_INITIAL_MATRIX; 1593 } else { /* safe to reuse the matrix */ 1594 reuse = MAT_REUSE_MATRIX; 1595 } 1596 } 1597 /* last check */ 1598 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1599 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1600 reuse = MAT_INITIAL_MATRIX; 1601 } 1602 } else { /* first time, so we need to create the matrix */ 1603 reuse = MAT_INITIAL_MATRIX; 1604 } 1605 /* extract A_RR */ 1606 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1607 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1608 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1609 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1610 if (matis->A == pcbddc->local_mat) { 1611 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1612 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1613 } else { 1614 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1615 } 1616 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1617 if (matis->A == pcbddc->local_mat) { 1618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1619 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1620 } else { 1621 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1622 } 1623 } 1624 if (!sub_schurs->reuse_mumps) { 1625 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1626 if (pcbddc->local_mat->symmetric_set) { 1627 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1628 } 1629 } else { 1630 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1631 1632 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1633 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1635 } 1636 if (!pcbddc->ksp_R) { /* create object if not present */ 1637 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1638 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1639 /* default */ 1640 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1641 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1642 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1643 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1644 if (issbaij) { 1645 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1646 } else { 1647 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1648 } 1649 /* Allow user's customization */ 1650 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1651 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1652 } 1653 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1654 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1655 if (!n_R) { 1656 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1657 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1658 } 1659 /* Reuse MUMPS solver if it is present */ 1660 if (sub_schurs->reuse_mumps) { 1661 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1662 1663 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1664 } 1665 /* Set Up KSP for Neumann problem of BDDC */ 1666 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1667 } 1668 /* free Neumann problem's matrix */ 1669 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1670 1671 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1672 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1673 if (pcbddc->dbg_flag) { 1674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1675 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1677 } 1678 if (dirichlet) { /* Dirichlet */ 1679 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1680 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1681 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1682 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1683 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1684 /* need to be adapted? */ 1685 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1686 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1687 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1688 /* print info */ 1689 if (pcbddc->dbg_flag) { 1690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1692 } 1693 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1694 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1695 } 1696 } 1697 if (neumann) { /* Neumann */ 1698 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1699 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1700 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1701 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1702 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1703 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1704 /* need to be adapted? */ 1705 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1706 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1707 /* print info */ 1708 if (pcbddc->dbg_flag) { 1709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1710 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1711 } 1712 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1713 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1722 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1723 { 1724 PetscErrorCode ierr; 1725 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1726 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1727 1728 PetscFunctionBegin; 1729 if (!sub_schurs->reuse_mumps) { 1730 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->switch_static) { 1733 if (applytranspose && pcbddc->local_auxmat1) { 1734 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1735 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1736 } 1737 if (!sub_schurs->reuse_mumps) { 1738 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 } else { 1741 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1742 1743 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1744 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1745 } 1746 } else { 1747 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1750 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 if (applytranspose && pcbddc->local_auxmat1) { 1752 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1753 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1754 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1755 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 } 1757 } 1758 if (!sub_schurs->reuse_mumps) { 1759 if (applytranspose) { 1760 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1761 } else { 1762 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1763 } 1764 #if defined(PETSC_HAVE_MUMPS) 1765 } else { 1766 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1767 1768 if (applytranspose) { 1769 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1770 } else { 1771 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1772 } 1773 #endif 1774 } 1775 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1776 if (!pcbddc->switch_static) { 1777 if (!sub_schurs->reuse_mumps) { 1778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 } else { 1781 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1782 1783 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1784 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1785 } 1786 if (!applytranspose && pcbddc->local_auxmat1) { 1787 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1788 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1789 } 1790 } else { 1791 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 if (!applytranspose && pcbddc->local_auxmat1) { 1796 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1797 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1798 } 1799 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1810 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1811 { 1812 PetscErrorCode ierr; 1813 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1814 PC_IS* pcis = (PC_IS*) (pc->data); 1815 const PetscScalar zero = 0.0; 1816 1817 PetscFunctionBegin; 1818 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1819 if (applytranspose) { 1820 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1821 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1822 } else { 1823 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1824 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1825 } 1826 /* start communications from local primal nodes to rhs of coarse solver */ 1827 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1828 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 1831 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1832 /* TODO remove null space when doing multilevel */ 1833 if (pcbddc->coarse_ksp) { 1834 Vec rhs,sol; 1835 1836 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1837 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1838 if (applytranspose) { 1839 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1840 } else { 1841 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1842 } 1843 } 1844 1845 /* Local solution on R nodes */ 1846 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1847 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1848 } 1849 1850 /* communications from coarse sol to local primal nodes */ 1851 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1852 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 1854 /* Sum contributions from two levels */ 1855 if (applytranspose) { 1856 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1857 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1858 } else { 1859 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1860 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1867 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1868 { 1869 PetscErrorCode ierr; 1870 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1871 PetscScalar *array; 1872 Vec from,to; 1873 1874 PetscFunctionBegin; 1875 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1876 from = pcbddc->coarse_vec; 1877 to = pcbddc->vec1_P; 1878 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1879 Vec tvec; 1880 1881 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1882 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1883 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1884 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1885 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1886 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1887 } 1888 } else { /* from local to global -> put data in coarse right hand side */ 1889 from = pcbddc->vec1_P; 1890 to = pcbddc->coarse_vec; 1891 } 1892 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1898 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1899 { 1900 PetscErrorCode ierr; 1901 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1902 PetscScalar *array; 1903 Vec from,to; 1904 1905 PetscFunctionBegin; 1906 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1907 from = pcbddc->coarse_vec; 1908 to = pcbddc->vec1_P; 1909 } else { /* from local to global -> put data in coarse right hand side */ 1910 from = pcbddc->vec1_P; 1911 to = pcbddc->coarse_vec; 1912 } 1913 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1914 if (smode == SCATTER_FORWARD) { 1915 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1916 Vec tvec; 1917 1918 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1919 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1920 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1921 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1922 } 1923 } else { 1924 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1925 ierr = VecResetArray(from);CHKERRQ(ierr); 1926 } 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 /* uncomment for testing purposes */ 1932 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1935 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1936 { 1937 PetscErrorCode ierr; 1938 PC_IS* pcis = (PC_IS*)(pc->data); 1939 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1940 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1941 /* one and zero */ 1942 PetscScalar one=1.0,zero=0.0; 1943 /* space to store constraints and their local indices */ 1944 PetscScalar *constraints_data; 1945 PetscInt *constraints_idxs,*constraints_idxs_B; 1946 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1947 PetscInt *constraints_n; 1948 /* iterators */ 1949 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1950 /* BLAS integers */ 1951 PetscBLASInt lwork,lierr; 1952 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1953 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1954 /* reuse */ 1955 PetscInt olocal_primal_size,olocal_primal_size_cc; 1956 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1957 /* change of basis */ 1958 PetscBool qr_needed; 1959 PetscBT change_basis,qr_needed_idx; 1960 /* auxiliary stuff */ 1961 PetscInt *nnz,*is_indices; 1962 PetscInt ncc; 1963 /* some quantities */ 1964 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1965 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1966 1967 PetscFunctionBegin; 1968 /* Destroy Mat objects computed previously */ 1969 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1970 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1971 /* save info on constraints from previous setup (if any) */ 1972 olocal_primal_size = pcbddc->local_primal_size; 1973 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1974 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1975 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1976 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1977 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1978 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1979 1980 /* print some info */ 1981 if (pcbddc->dbg_flag) { 1982 IS vertices; 1983 PetscInt nv,nedges,nfaces; 1984 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1985 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1986 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1993 } 1994 1995 if (!pcbddc->adaptive_selection) { 1996 IS ISForVertices,*ISForFaces,*ISForEdges; 1997 MatNullSpace nearnullsp; 1998 const Vec *nearnullvecs; 1999 Vec *localnearnullsp; 2000 PetscScalar *array; 2001 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2002 PetscBool nnsp_has_cnst; 2003 /* LAPACK working arrays for SVD or POD */ 2004 PetscBool skip_lapack,boolforchange; 2005 PetscScalar *work; 2006 PetscReal *singular_vals; 2007 #if defined(PETSC_USE_COMPLEX) 2008 PetscReal *rwork; 2009 #endif 2010 #if defined(PETSC_MISSING_LAPACK_GESVD) 2011 PetscScalar *temp_basis,*correlation_mat; 2012 #else 2013 PetscBLASInt dummy_int=1; 2014 PetscScalar dummy_scalar=1.; 2015 #endif 2016 2017 /* Get index sets for faces, edges and vertices from graph */ 2018 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2019 /* free unneeded index sets */ 2020 if (!pcbddc->use_vertices) { 2021 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2022 } 2023 if (!pcbddc->use_edges) { 2024 for (i=0;i<n_ISForEdges;i++) { 2025 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2028 n_ISForEdges = 0; 2029 } 2030 if (!pcbddc->use_faces) { 2031 for (i=0;i<n_ISForFaces;i++) { 2032 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2033 } 2034 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2035 n_ISForFaces = 0; 2036 } 2037 2038 #if defined(PETSC_USE_DEBUG) 2039 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2040 Also use_change_of_basis should be consistent among processors */ 2041 if (pcbddc->NullSpace) { 2042 PetscBool tbool[2],gbool[2]; 2043 2044 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2045 pcbddc->use_change_of_basis = PETSC_TRUE; 2046 if (!ISForEdges) { 2047 pcbddc->use_change_on_faces = PETSC_TRUE; 2048 } 2049 } 2050 tbool[0] = pcbddc->use_change_of_basis; 2051 tbool[1] = pcbddc->use_change_on_faces; 2052 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2053 pcbddc->use_change_of_basis = gbool[0]; 2054 pcbddc->use_change_on_faces = gbool[1]; 2055 } 2056 #endif 2057 2058 /* check if near null space is attached to global mat */ 2059 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2060 if (nearnullsp) { 2061 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2062 /* remove any stored info */ 2063 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2064 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2065 /* store information for BDDC solver reuse */ 2066 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2067 pcbddc->onearnullspace = nearnullsp; 2068 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2069 for (i=0;i<nnsp_size;i++) { 2070 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2071 } 2072 } else { /* if near null space is not provided BDDC uses constants by default */ 2073 nnsp_size = 0; 2074 nnsp_has_cnst = PETSC_TRUE; 2075 } 2076 /* get max number of constraints on a single cc */ 2077 max_constraints = nnsp_size; 2078 if (nnsp_has_cnst) max_constraints++; 2079 2080 /* 2081 Evaluate maximum storage size needed by the procedure 2082 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2083 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2084 There can be multiple constraints per connected component 2085 */ 2086 n_vertices = 0; 2087 if (ISForVertices) { 2088 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2089 } 2090 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2091 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2092 2093 total_counts = n_ISForFaces+n_ISForEdges; 2094 total_counts *= max_constraints; 2095 total_counts += n_vertices; 2096 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2097 2098 total_counts = 0; 2099 max_size_of_constraint = 0; 2100 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2101 IS used_is; 2102 if (i<n_ISForEdges) { 2103 used_is = ISForEdges[i]; 2104 } else { 2105 used_is = ISForFaces[i-n_ISForEdges]; 2106 } 2107 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2108 total_counts += j; 2109 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2110 } 2111 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2112 2113 /* get local part of global near null space vectors */ 2114 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2115 for (k=0;k<nnsp_size;k++) { 2116 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2117 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 } 2120 2121 /* whether or not to skip lapack calls */ 2122 skip_lapack = PETSC_TRUE; 2123 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2124 2125 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2126 if (!skip_lapack) { 2127 PetscScalar temp_work; 2128 2129 #if defined(PETSC_MISSING_LAPACK_GESVD) 2130 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2131 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2134 #if defined(PETSC_USE_COMPLEX) 2135 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2136 #endif 2137 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2138 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2139 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2140 lwork = -1; 2141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2142 #if !defined(PETSC_USE_COMPLEX) 2143 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2144 #else 2145 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2146 #endif 2147 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2148 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2149 #else /* on missing GESVD */ 2150 /* SVD */ 2151 PetscInt max_n,min_n; 2152 max_n = max_size_of_constraint; 2153 min_n = max_constraints; 2154 if (max_size_of_constraint < max_constraints) { 2155 min_n = max_size_of_constraint; 2156 max_n = max_constraints; 2157 } 2158 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2159 #if defined(PETSC_USE_COMPLEX) 2160 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2161 #endif 2162 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2163 lwork = -1; 2164 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2167 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2168 #if !defined(PETSC_USE_COMPLEX) 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2170 #else 2171 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2172 #endif 2173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2174 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2175 #endif /* on missing GESVD */ 2176 /* Allocate optimal workspace */ 2177 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2178 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2179 } 2180 /* Now we can loop on constraining sets */ 2181 total_counts = 0; 2182 constraints_idxs_ptr[0] = 0; 2183 constraints_data_ptr[0] = 0; 2184 /* vertices */ 2185 if (n_vertices) { 2186 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2187 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2188 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2189 for (i=0;i<n_vertices;i++) { 2190 constraints_n[total_counts] = 1; 2191 constraints_data[total_counts] = 1.0; 2192 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2193 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2194 total_counts++; 2195 } 2196 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2197 PetscBool used_vertex; 2198 for (i=0;i<n_vertices;i++) { 2199 used_vertex = PETSC_FALSE; 2200 k = 0; 2201 while (!used_vertex && k<nnsp_size) { 2202 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2203 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2204 constraints_n[total_counts] = 1; 2205 constraints_idxs[total_counts] = is_indices[i]; 2206 constraints_data[total_counts] = 1.0; 2207 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2208 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2209 total_counts++; 2210 used_vertex = PETSC_TRUE; 2211 } 2212 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2213 k++; 2214 } 2215 } 2216 } 2217 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2218 n_vertices = total_counts; 2219 } 2220 2221 /* edges and faces */ 2222 total_counts_cc = total_counts; 2223 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2224 IS used_is; 2225 PetscBool idxs_copied = PETSC_FALSE; 2226 2227 if (ncc<n_ISForEdges) { 2228 used_is = ISForEdges[ncc]; 2229 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2230 } else { 2231 used_is = ISForFaces[ncc-n_ISForEdges]; 2232 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2233 } 2234 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2235 2236 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2237 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2238 /* change of basis should not be performed on local periodic nodes */ 2239 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2240 if (nnsp_has_cnst) { 2241 PetscScalar quad_value; 2242 2243 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2244 idxs_copied = PETSC_TRUE; 2245 2246 if (!pcbddc->use_nnsp_true) { 2247 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2248 } else { 2249 quad_value = 1.0; 2250 } 2251 for (j=0;j<size_of_constraint;j++) { 2252 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2253 } 2254 temp_constraints++; 2255 total_counts++; 2256 } 2257 for (k=0;k<nnsp_size;k++) { 2258 PetscReal real_value; 2259 PetscScalar *ptr_to_data; 2260 2261 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2262 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2263 for (j=0;j<size_of_constraint;j++) { 2264 ptr_to_data[j] = array[is_indices[j]]; 2265 } 2266 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2267 /* check if array is null on the connected component */ 2268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2269 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2270 if (real_value > 0.0) { /* keep indices and values */ 2271 temp_constraints++; 2272 total_counts++; 2273 if (!idxs_copied) { 2274 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2275 idxs_copied = PETSC_TRUE; 2276 } 2277 } 2278 } 2279 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2280 valid_constraints = temp_constraints; 2281 if (!pcbddc->use_nnsp_true && temp_constraints) { 2282 if (temp_constraints == 1) { /* just normalize the constraint */ 2283 PetscScalar norm,*ptr_to_data; 2284 2285 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2287 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2288 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2289 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2290 } else { /* perform SVD */ 2291 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2292 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2293 2294 #if defined(PETSC_MISSING_LAPACK_GESVD) 2295 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2296 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2297 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2298 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2299 from that computed using LAPACKgesvd 2300 -> This is due to a different computation of eigenvectors in LAPACKheev 2301 -> The quality of the POD-computed basis will be the same */ 2302 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2303 /* Store upper triangular part of correlation matrix */ 2304 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2305 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2306 for (j=0;j<temp_constraints;j++) { 2307 for (k=0;k<j+1;k++) { 2308 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2309 } 2310 } 2311 /* compute eigenvalues and eigenvectors of correlation matrix */ 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2314 #if !defined(PETSC_USE_COMPLEX) 2315 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2316 #else 2317 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2318 #endif 2319 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2320 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2321 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2322 j = 0; 2323 while (j < temp_constraints && singular_vals[j] < tol) j++; 2324 total_counts = total_counts-j; 2325 valid_constraints = temp_constraints-j; 2326 /* scale and copy POD basis into used quadrature memory */ 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2333 if (j<temp_constraints) { 2334 PetscInt ii; 2335 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2336 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2337 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2339 for (k=0;k<temp_constraints-j;k++) { 2340 for (ii=0;ii<size_of_constraint;ii++) { 2341 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2342 } 2343 } 2344 } 2345 #else /* on missing GESVD */ 2346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2347 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2349 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2350 #if !defined(PETSC_USE_COMPLEX) 2351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2352 #else 2353 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2354 #endif 2355 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2356 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2357 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2358 k = temp_constraints; 2359 if (k > size_of_constraint) k = size_of_constraint; 2360 j = 0; 2361 while (j < k && singular_vals[k-j-1] < tol) j++; 2362 valid_constraints = k-j; 2363 total_counts = total_counts-temp_constraints+valid_constraints; 2364 #endif /* on missing GESVD */ 2365 } 2366 } 2367 /* update pointers information */ 2368 if (valid_constraints) { 2369 constraints_n[total_counts_cc] = valid_constraints; 2370 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2371 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2372 /* set change_of_basis flag */ 2373 if (boolforchange) { 2374 PetscBTSet(change_basis,total_counts_cc); 2375 } 2376 total_counts_cc++; 2377 } 2378 } 2379 /* free workspace */ 2380 if (!skip_lapack) { 2381 ierr = PetscFree(work);CHKERRQ(ierr); 2382 #if defined(PETSC_USE_COMPLEX) 2383 ierr = PetscFree(rwork);CHKERRQ(ierr); 2384 #endif 2385 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2386 #if defined(PETSC_MISSING_LAPACK_GESVD) 2387 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2388 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2389 #endif 2390 } 2391 for (k=0;k<nnsp_size;k++) { 2392 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2393 } 2394 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2395 /* free index sets of faces, edges and vertices */ 2396 for (i=0;i<n_ISForFaces;i++) { 2397 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2398 } 2399 if (n_ISForFaces) { 2400 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2401 } 2402 for (i=0;i<n_ISForEdges;i++) { 2403 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2404 } 2405 if (n_ISForEdges) { 2406 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2407 } 2408 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2409 } else { 2410 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2411 2412 total_counts = 0; 2413 n_vertices = 0; 2414 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2415 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2416 } 2417 max_constraints = 0; 2418 total_counts_cc = 0; 2419 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2420 total_counts += pcbddc->adaptive_constraints_n[i]; 2421 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2422 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2423 } 2424 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2425 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2426 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2427 constraints_data = pcbddc->adaptive_constraints_data; 2428 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2429 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2430 total_counts_cc = 0; 2431 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2432 if (pcbddc->adaptive_constraints_n[i]) { 2433 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2434 } 2435 } 2436 #if 0 2437 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2438 for (i=0;i<total_counts_cc;i++) { 2439 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2440 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2441 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2442 printf(" %d",constraints_idxs[j]); 2443 } 2444 printf("\n"); 2445 printf("number of cc: %d\n",constraints_n[i]); 2446 } 2447 for (i=0;i<n_vertices;i++) { 2448 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2449 } 2450 for (i=0;i<sub_schurs->n_subs;i++) { 2451 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2452 } 2453 #endif 2454 2455 max_size_of_constraint = 0; 2456 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2457 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2458 /* Change of basis */ 2459 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2460 if (pcbddc->use_change_of_basis) { 2461 for (i=0;i<sub_schurs->n_subs;i++) { 2462 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2463 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2464 } 2465 } 2466 } 2467 } 2468 pcbddc->local_primal_size = total_counts; 2469 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2470 2471 /* map constraints_idxs in boundary numbering */ 2472 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2473 if (i != constraints_idxs_ptr[total_counts_cc]) { 2474 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2475 } 2476 2477 /* Create constraint matrix */ 2478 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2479 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2480 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2481 2482 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2483 /* determine if a QR strategy is needed for change of basis */ 2484 qr_needed = PETSC_FALSE; 2485 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2486 total_primal_vertices=0; 2487 pcbddc->local_primal_size_cc = 0; 2488 for (i=0;i<total_counts_cc;i++) { 2489 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2490 if (size_of_constraint == 1) { 2491 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2492 pcbddc->local_primal_size_cc += 1; 2493 } else if (PetscBTLookup(change_basis,i)) { 2494 for (k=0;k<constraints_n[i];k++) { 2495 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2496 } 2497 pcbddc->local_primal_size_cc += constraints_n[i]; 2498 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2499 PetscBTSet(qr_needed_idx,i); 2500 qr_needed = PETSC_TRUE; 2501 } 2502 } else { 2503 pcbddc->local_primal_size_cc += 1; 2504 } 2505 } 2506 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2507 pcbddc->n_vertices = total_primal_vertices; 2508 /* permute indices in order to have a sorted set of vertices */ 2509 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2510 2511 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2512 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2513 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2514 2515 /* nonzero structure of constraint matrix */ 2516 /* and get reference dof for local constraints */ 2517 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2518 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2519 2520 j = total_primal_vertices; 2521 total_counts = total_primal_vertices; 2522 cum = total_primal_vertices; 2523 for (i=n_vertices;i<total_counts_cc;i++) { 2524 if (!PetscBTLookup(change_basis,i)) { 2525 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2526 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2527 cum++; 2528 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2529 for (k=0;k<constraints_n[i];k++) { 2530 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2531 nnz[j+k] = size_of_constraint; 2532 } 2533 j += constraints_n[i]; 2534 } 2535 } 2536 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2537 ierr = PetscFree(nnz);CHKERRQ(ierr); 2538 2539 /* set values in constraint matrix */ 2540 for (i=0;i<total_primal_vertices;i++) { 2541 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2542 } 2543 total_counts = total_primal_vertices; 2544 for (i=n_vertices;i<total_counts_cc;i++) { 2545 if (!PetscBTLookup(change_basis,i)) { 2546 PetscInt *cols; 2547 2548 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2549 cols = constraints_idxs+constraints_idxs_ptr[i]; 2550 for (k=0;k<constraints_n[i];k++) { 2551 PetscInt row = total_counts+k; 2552 PetscScalar *vals; 2553 2554 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2555 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2556 } 2557 total_counts += constraints_n[i]; 2558 } 2559 } 2560 /* assembling */ 2561 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2562 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 2564 /* 2565 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2566 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2567 */ 2568 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2569 if (pcbddc->use_change_of_basis) { 2570 /* dual and primal dofs on a single cc */ 2571 PetscInt dual_dofs,primal_dofs; 2572 /* working stuff for GEQRF */ 2573 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2574 PetscBLASInt lqr_work; 2575 /* working stuff for UNGQR */ 2576 PetscScalar *gqr_work,lgqr_work_t; 2577 PetscBLASInt lgqr_work; 2578 /* working stuff for TRTRS */ 2579 PetscScalar *trs_rhs; 2580 PetscBLASInt Blas_NRHS; 2581 /* pointers for values insertion into change of basis matrix */ 2582 PetscInt *start_rows,*start_cols; 2583 PetscScalar *start_vals; 2584 /* working stuff for values insertion */ 2585 PetscBT is_primal; 2586 PetscInt *aux_primal_numbering_B; 2587 /* matrix sizes */ 2588 PetscInt global_size,local_size; 2589 /* temporary change of basis */ 2590 Mat localChangeOfBasisMatrix; 2591 /* extra space for debugging */ 2592 PetscScalar *dbg_work; 2593 2594 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2595 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2596 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2597 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2598 /* nonzeros for local mat */ 2599 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2600 for (i=0;i<pcis->n;i++) nnz[i]=1; 2601 for (i=n_vertices;i<total_counts_cc;i++) { 2602 if (PetscBTLookup(change_basis,i)) { 2603 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2604 if (PetscBTLookup(qr_needed_idx,i)) { 2605 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2606 } else { 2607 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2608 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2609 } 2610 } 2611 } 2612 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2613 ierr = PetscFree(nnz);CHKERRQ(ierr); 2614 /* Set initial identity in the matrix */ 2615 for (i=0;i<pcis->n;i++) { 2616 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2617 } 2618 2619 if (pcbddc->dbg_flag) { 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2622 } 2623 2624 2625 /* Now we loop on the constraints which need a change of basis */ 2626 /* 2627 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2628 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2629 2630 Basic blocks of change of basis matrix T computed by 2631 2632 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2633 2634 | 1 0 ... 0 s_1/S | 2635 | 0 1 ... 0 s_2/S | 2636 | ... | 2637 | 0 ... 1 s_{n-1}/S | 2638 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2639 2640 with S = \sum_{i=1}^n s_i^2 2641 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2642 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2643 2644 - QR decomposition of constraints otherwise 2645 */ 2646 if (qr_needed) { 2647 /* space to store Q */ 2648 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2649 /* first we issue queries for optimal work */ 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2651 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2653 lqr_work = -1; 2654 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2655 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2657 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2658 lgqr_work = -1; 2659 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2661 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2663 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2664 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2665 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2666 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2667 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2668 /* array to store scaling factors for reflectors */ 2669 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2670 /* array to store rhs and solution of triangular solver */ 2671 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2672 /* allocating workspace for check */ 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2675 } 2676 } 2677 /* array to store whether a node is primal or not */ 2678 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2679 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2680 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2681 if (i != total_primal_vertices) { 2682 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2683 } 2684 for (i=0;i<total_primal_vertices;i++) { 2685 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2686 } 2687 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2688 2689 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2690 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2691 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2692 if (PetscBTLookup(change_basis,total_counts)) { 2693 /* get constraint info */ 2694 primal_dofs = constraints_n[total_counts]; 2695 dual_dofs = size_of_constraint-primal_dofs; 2696 2697 if (pcbddc->dbg_flag) { 2698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2699 } 2700 2701 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2702 2703 /* copy quadrature constraints for change of basis check */ 2704 if (pcbddc->dbg_flag) { 2705 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2706 } 2707 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2708 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2709 2710 /* compute QR decomposition of constraints */ 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2712 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2714 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2715 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2716 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2717 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2718 2719 /* explictly compute R^-T */ 2720 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2721 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2726 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2727 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2728 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2730 2731 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2736 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2737 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2738 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2739 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2740 2741 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2742 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2743 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2750 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2751 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2752 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2753 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2754 2755 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2756 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2757 /* insert cols for primal dofs */ 2758 for (j=0;j<primal_dofs;j++) { 2759 start_vals = &qr_basis[j*size_of_constraint]; 2760 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2761 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2762 } 2763 /* insert cols for dual dofs */ 2764 for (j=0,k=0;j<dual_dofs;k++) { 2765 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2766 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2767 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2768 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2769 j++; 2770 } 2771 } 2772 2773 /* check change of basis */ 2774 if (pcbddc->dbg_flag) { 2775 PetscInt ii,jj; 2776 PetscBool valid_qr=PETSC_TRUE; 2777 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2784 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2785 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2786 for (jj=0;jj<size_of_constraint;jj++) { 2787 for (ii=0;ii<primal_dofs;ii++) { 2788 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2789 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2790 } 2791 } 2792 if (!valid_qr) { 2793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2794 for (jj=0;jj<size_of_constraint;jj++) { 2795 for (ii=0;ii<primal_dofs;ii++) { 2796 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2797 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2798 } 2799 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2800 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2801 } 2802 } 2803 } 2804 } else { 2805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2806 } 2807 } 2808 } else { /* simple transformation block */ 2809 PetscInt row,col; 2810 PetscScalar val,norm; 2811 2812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2813 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2814 for (j=0;j<size_of_constraint;j++) { 2815 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2816 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2817 if (!PetscBTLookup(is_primal,row_B)) { 2818 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2819 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2820 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2821 } else { 2822 for (k=0;k<size_of_constraint;k++) { 2823 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2824 if (row != col) { 2825 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2826 } else { 2827 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2828 } 2829 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2830 } 2831 } 2832 } 2833 if (pcbddc->dbg_flag) { 2834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2835 } 2836 } 2837 } else { 2838 if (pcbddc->dbg_flag) { 2839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2840 } 2841 } 2842 } 2843 2844 /* free workspace */ 2845 if (qr_needed) { 2846 if (pcbddc->dbg_flag) { 2847 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2848 } 2849 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2850 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2852 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2853 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2854 } 2855 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2856 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2857 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2858 2859 /* assembling of global change of variable */ 2860 { 2861 Mat tmat; 2862 PetscInt bs; 2863 2864 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2865 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2866 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2867 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2868 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2869 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2870 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2871 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2872 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2873 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2874 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2875 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2876 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2877 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2878 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2879 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2881 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2882 } 2883 /* check */ 2884 if (pcbddc->dbg_flag) { 2885 PetscReal error; 2886 Vec x,x_change; 2887 2888 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2889 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2890 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2891 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2895 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2896 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2897 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2898 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2899 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2901 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2902 ierr = VecDestroy(&x);CHKERRQ(ierr); 2903 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2904 } 2905 2906 /* adapt sub_schurs computed (if any) */ 2907 if (pcbddc->use_deluxe_scaling) { 2908 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2909 if (sub_schurs->S_Ej_all) { 2910 Mat S_new,tmat; 2911 IS is_all_N; 2912 2913 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2914 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2915 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2916 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2917 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2918 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2919 sub_schurs->S_Ej_all = S_new; 2920 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2921 if (sub_schurs->sum_S_Ej_all) { 2922 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2923 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2924 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2925 sub_schurs->sum_S_Ej_all = S_new; 2926 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2927 } 2928 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2929 } 2930 } 2931 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2932 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2933 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2934 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2935 } 2936 2937 /* set up change of basis context */ 2938 if (pcbddc->ChangeOfBasisMatrix) { 2939 PCBDDCChange_ctx change_ctx; 2940 2941 if (!pcbddc->new_global_mat) { 2942 PetscInt global_size,local_size; 2943 2944 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2945 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2946 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2947 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2948 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2949 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2950 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2951 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2952 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2953 } else { 2954 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2955 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2956 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2957 } 2958 if (!pcbddc->user_ChangeOfBasisMatrix) { 2959 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2960 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2961 } else { 2962 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2963 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2964 } 2965 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2966 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2967 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 } 2970 2971 /* check if a new primal space has been introduced */ 2972 pcbddc->new_primal_space_local = PETSC_TRUE; 2973 if (olocal_primal_size == pcbddc->local_primal_size) { 2974 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2975 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2976 if (!pcbddc->new_primal_space_local) { 2977 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2978 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2979 } 2980 } 2981 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2982 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2983 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2984 2985 /* flush dbg viewer */ 2986 if (pcbddc->dbg_flag) { 2987 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2988 } 2989 2990 /* free workspace */ 2991 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2992 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2993 if (!pcbddc->adaptive_selection) { 2994 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2995 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2996 } else { 2997 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2998 pcbddc->adaptive_constraints_idxs_ptr, 2999 pcbddc->adaptive_constraints_data_ptr, 3000 pcbddc->adaptive_constraints_idxs, 3001 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3002 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3003 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3010 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3011 { 3012 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3013 PC_IS *pcis = (PC_IS*)pc->data; 3014 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3015 PetscInt ierr,i,vertex_size,N; 3016 PetscViewer viewer=pcbddc->dbg_viewer; 3017 3018 PetscFunctionBegin; 3019 /* Reset previously computed graph */ 3020 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3021 /* Init local Graph struct */ 3022 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3023 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3024 3025 /* Check validity of the csr graph passed in by the user */ 3026 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3027 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3028 } 3029 3030 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3031 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3032 PetscInt *xadj,*adjncy; 3033 PetscInt nvtxs; 3034 PetscBool flg_row=PETSC_FALSE; 3035 3036 if (pcbddc->use_local_adj) { 3037 3038 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3039 if (flg_row) { 3040 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3041 pcbddc->computed_rowadj = PETSC_TRUE; 3042 } 3043 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3044 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3045 IS is_dummy; 3046 ISLocalToGlobalMapping l2gmap_dummy; 3047 PetscInt j,sum; 3048 PetscInt *cxadj,*cadjncy; 3049 const PetscInt *idxs; 3050 PCBDDCGraph graph; 3051 PetscBT is_on_boundary; 3052 3053 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3054 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3055 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3056 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3057 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3058 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3059 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3060 if (flg_row) { 3061 graph->xadj = xadj; 3062 graph->adjncy = adjncy; 3063 } 3064 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3065 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3066 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3067 3068 if (pcbddc->dbg_flag) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3070 for (i=0;i<graph->ncc;i++) { 3071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3072 } 3073 } 3074 3075 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3076 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3077 for (i=0;i<pcis->n_B;i++) { 3078 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3079 } 3080 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3081 3082 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3083 sum = 0; 3084 for (i=0;i<graph->ncc;i++) { 3085 PetscInt sizecc = 0; 3086 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3087 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3088 sizecc++; 3089 } 3090 } 3091 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3092 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3093 cxadj[graph->queue[j]] = sizecc; 3094 } 3095 } 3096 sum += sizecc*sizecc; 3097 } 3098 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3099 sum = 0; 3100 for (i=0;i<pcis->n;i++) { 3101 PetscInt temp = cxadj[i]; 3102 cxadj[i] = sum; 3103 sum += temp; 3104 } 3105 cxadj[pcis->n] = sum; 3106 for (i=0;i<graph->ncc;i++) { 3107 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3108 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3109 PetscInt k,sizecc = 0; 3110 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3111 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3112 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3113 sizecc++; 3114 } 3115 } 3116 } 3117 } 3118 } 3119 if (sum) { 3120 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3121 } else { 3122 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3123 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3124 } 3125 graph->xadj = 0; 3126 graph->adjncy = 0; 3127 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3128 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3129 } 3130 } 3131 if (pcbddc->dbg_flag) { 3132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3133 } 3134 3135 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3136 vertex_size = 1; 3137 if (pcbddc->user_provided_isfordofs) { 3138 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3139 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3140 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3141 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3142 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3143 } 3144 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3145 pcbddc->n_ISForDofs = 0; 3146 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3147 } 3148 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3149 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3150 } else { 3151 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3152 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3153 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3154 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3155 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3156 } 3157 } 3158 } 3159 3160 /* Setup of Graph */ 3161 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3162 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3163 } 3164 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3165 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3166 } 3167 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3168 3169 /* Graph's connected components analysis */ 3170 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3171 3172 /* print some info to stdout */ 3173 if (pcbddc->dbg_flag) { 3174 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3175 } 3176 3177 /* mark topography has done */ 3178 pcbddc->recompute_topography = PETSC_FALSE; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "PCBDDCSubsetNumbering" 3185 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3186 { 3187 PetscSF sf; 3188 PetscLayout map; 3189 const PetscInt *idxs; 3190 PetscInt *leaf_data,*root_data,*gidxs; 3191 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3192 PetscInt n_n,nlocals,start,first_index; 3193 PetscMPIInt commsize; 3194 PetscBool first_found; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3199 if (subset_mult) { 3200 PetscCheckSameComm(subset,1,subset_mult,2); 3201 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3202 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3203 } 3204 /* create workspace layout for computing global indices of subset */ 3205 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3206 lbounds[0] = lbounds[1] = 0; 3207 for (i=0;i<n;i++) { 3208 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3209 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3210 } 3211 lbounds[0] = -lbounds[0]; 3212 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3213 gbounds[0] = -gbounds[0]; 3214 N = gbounds[1] - gbounds[0] + 1; 3215 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3216 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3219 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3220 3221 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3222 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3223 if (subset_mult) { 3224 const PetscInt* idxs_mult; 3225 3226 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3227 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3228 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3229 } else { 3230 for (i=0;i<n;i++) leaf_data[i] = 1; 3231 } 3232 /* local size of new subset */ 3233 n_n = 0; 3234 for (i=0;i<n;i++) n_n += leaf_data[i]; 3235 3236 /* global indexes in layout */ 3237 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3238 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3239 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3240 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3241 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3242 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3243 3244 /* reduce from leaves to roots */ 3245 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3246 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3247 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3248 3249 /* count indexes in local part of layout */ 3250 nlocals = 0; 3251 first_index = -1; 3252 first_found = PETSC_FALSE; 3253 for (i=0;i<Nl;i++) { 3254 if (!first_found && root_data[i]) { 3255 first_found = PETSC_TRUE; 3256 first_index = i; 3257 } 3258 nlocals += root_data[i]; 3259 } 3260 3261 /* cumulative of number of indexes and size of subset without holes */ 3262 start = 0; 3263 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3264 if (N_n) { /* compute total size of new subset if requested */ 3265 *N_n = start + nlocals; 3266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3267 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3268 } 3269 /* adapt roof data with cumulative */ 3270 if (first_found) { 3271 PetscInt old_index; 3272 3273 root_data[first_index] += start; 3274 old_index = first_index; 3275 for (i=first_index+1;i<Nl;i++) { 3276 if (root_data[i]) { 3277 root_data[i] += root_data[old_index]; 3278 old_index = i; 3279 } 3280 } 3281 } 3282 3283 /* from roots to leaves */ 3284 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3285 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3286 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3287 3288 /* create new IS with global indexes without holes */ 3289 if (subset_mult) { 3290 const PetscInt* idxs_mult; 3291 PetscInt cum; 3292 3293 cum = 0; 3294 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3295 for (i=0;i<n;i++) { 3296 PetscInt j; 3297 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3298 } 3299 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3300 } else { 3301 for (i=0;i<n;i++) { 3302 gidxs[i] = leaf_data[i]-1; 3303 } 3304 } 3305 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3306 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3307 PetscFunctionReturn(0); 3308 } 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3312 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3313 { 3314 PetscInt i,j; 3315 PetscScalar *alphas; 3316 PetscErrorCode ierr; 3317 3318 PetscFunctionBegin; 3319 /* this implements stabilized Gram-Schmidt */ 3320 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3321 for (i=0;i<n;i++) { 3322 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3323 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3324 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3325 } 3326 ierr = PetscFree(alphas);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatISGetSubassemblingPattern" 3332 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3333 { 3334 Mat_IS *matis; 3335 IS ranks_send_to; 3336 PetscInt n_neighs,*neighs,*n_shared,**shared; 3337 PetscMPIInt size,rank,color; 3338 PetscInt *xadj,*adjncy; 3339 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3340 PetscInt i,local_size,threshold=0; 3341 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3342 PetscSubcomm subcomm; 3343 PetscErrorCode ierr; 3344 3345 PetscFunctionBegin; 3346 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3347 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3348 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3349 3350 /* Get info on mapping */ 3351 matis = (Mat_IS*)(mat->data); 3352 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3353 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3354 3355 /* build local CSR graph of subdomains' connectivity */ 3356 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3357 xadj[0] = 0; 3358 xadj[1] = PetscMax(n_neighs-1,0); 3359 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3360 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3361 3362 if (threshold) { 3363 PetscInt xadj_count = 0; 3364 for (i=1;i<n_neighs;i++) { 3365 if (n_shared[i] > threshold) { 3366 adjncy[xadj_count] = neighs[i]; 3367 adjncy_wgt[xadj_count] = n_shared[i]; 3368 xadj_count++; 3369 } 3370 } 3371 xadj[1] = xadj_count; 3372 } else { 3373 if (xadj[1]) { 3374 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3375 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3376 } 3377 } 3378 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3379 if (use_square) { 3380 for (i=0;i<xadj[1];i++) { 3381 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3382 } 3383 } 3384 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3385 3386 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3387 3388 /* 3389 Restrict work on active processes only. 3390 */ 3391 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3392 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3393 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3394 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3395 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3396 if (color) { 3397 ierr = PetscFree(xadj);CHKERRQ(ierr); 3398 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3399 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3400 } else { 3401 Mat subdomain_adj; 3402 IS new_ranks,new_ranks_contig; 3403 MatPartitioning partitioner; 3404 PetscInt prank,rstart=0,rend=0; 3405 PetscInt *is_indices,*oldranks; 3406 PetscBool aggregate; 3407 3408 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3409 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3410 prank = rank; 3411 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3412 /* 3413 for (i=0;i<size;i++) { 3414 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3415 } 3416 */ 3417 for (i=0;i<xadj[1];i++) { 3418 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3419 } 3420 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3421 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3422 if (aggregate) { 3423 PetscInt lrows,row,ncols,*cols; 3424 PetscMPIInt nrank; 3425 PetscScalar *vals; 3426 3427 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3428 lrows = 0; 3429 if (nrank<redprocs) { 3430 lrows = size/redprocs; 3431 if (nrank<size%redprocs) lrows++; 3432 } 3433 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,100,NULL,100,NULL,&subdomain_adj);CHKERRQ(ierr); 3434 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3435 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3436 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3437 row = nrank; 3438 ncols = xadj[1]-xadj[0]; 3439 cols = adjncy; 3440 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3441 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3442 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3443 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3444 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3445 ierr = PetscFree(xadj);CHKERRQ(ierr); 3446 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3447 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3448 ierr = PetscFree(vals);CHKERRQ(ierr); 3449 } else { 3450 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3451 } 3452 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3453 3454 /* Partition */ 3455 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3456 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3457 if (use_vwgt) { 3458 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3459 v_wgt[0] = local_size; 3460 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3461 } 3462 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3463 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3464 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3465 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3466 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3467 3468 /* renumber new_ranks to avoid "holes" in new set of processors */ 3469 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3470 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3471 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3472 if (!redprocs) { 3473 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3474 } else { 3475 PetscInt idxs[1]; 3476 PetscMPIInt tag; 3477 MPI_Request *reqs; 3478 3479 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3480 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3481 for (i=rstart;i<rend;i++) { 3482 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3483 } 3484 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3485 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3486 ierr = PetscFree(reqs);CHKERRQ(ierr); 3487 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3488 } 3489 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3490 /* clean up */ 3491 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3492 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3493 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3494 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3495 } 3496 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3497 3498 /* assemble parallel IS for sends */ 3499 i = 1; 3500 if (color) i=0; 3501 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3502 /* get back IS */ 3503 *is_sends = ranks_send_to; 3504 PetscFunctionReturn(0); 3505 } 3506 3507 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3508 3509 #undef __FUNCT__ 3510 #define __FUNCT__ "MatISSubassemble" 3511 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3512 { 3513 Mat local_mat; 3514 Mat_IS *matis; 3515 IS is_sends_internal; 3516 PetscInt rows,cols,new_local_rows; 3517 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3518 PetscBool ismatis,isdense,newisdense,destroy_mat; 3519 ISLocalToGlobalMapping l2gmap; 3520 PetscInt* l2gmap_indices; 3521 const PetscInt* is_indices; 3522 MatType new_local_type; 3523 /* buffers */ 3524 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3525 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3526 PetscInt *recv_buffer_idxs_local; 3527 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3528 /* MPI */ 3529 MPI_Comm comm,comm_n; 3530 PetscSubcomm subcomm; 3531 PetscMPIInt n_sends,n_recvs,commsize; 3532 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3533 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3534 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3535 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3536 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3537 PetscErrorCode ierr; 3538 3539 PetscFunctionBegin; 3540 /* TODO: add missing checks */ 3541 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3542 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3543 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3544 PetscValidLogicalCollectiveInt(mat,nis,7); 3545 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3546 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3547 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3548 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3549 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3550 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3551 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3552 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3553 PetscInt mrows,mcols,mnrows,mncols; 3554 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3555 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3556 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3557 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3558 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3559 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3560 } 3561 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3562 PetscValidLogicalCollectiveInt(mat,bs,0); 3563 /* prepare IS for sending if not provided */ 3564 if (!is_sends) { 3565 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3566 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3567 } else { 3568 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3569 is_sends_internal = is_sends; 3570 } 3571 3572 /* get pointer of MATIS data */ 3573 matis = (Mat_IS*)mat->data; 3574 3575 /* get comm */ 3576 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3577 3578 /* compute number of sends */ 3579 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3580 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3581 3582 /* compute number of receives */ 3583 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3584 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3585 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3586 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3587 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3588 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3589 ierr = PetscFree(iflags);CHKERRQ(ierr); 3590 3591 /* restrict comm if requested */ 3592 subcomm = 0; 3593 destroy_mat = PETSC_FALSE; 3594 if (restrict_comm) { 3595 PetscMPIInt color,subcommsize; 3596 3597 color = 0; 3598 if (restrict_full) { 3599 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3600 } else { 3601 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3602 } 3603 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3604 subcommsize = commsize - subcommsize; 3605 /* check if reuse has been requested */ 3606 if (reuse == MAT_REUSE_MATRIX) { 3607 if (*mat_n) { 3608 PetscMPIInt subcommsize2; 3609 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3610 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3611 comm_n = PetscObjectComm((PetscObject)*mat_n); 3612 } else { 3613 comm_n = PETSC_COMM_SELF; 3614 } 3615 } else { /* MAT_INITIAL_MATRIX */ 3616 PetscMPIInt rank; 3617 3618 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3619 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3620 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3621 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3622 comm_n = PetscSubcommChild(subcomm); 3623 } 3624 /* flag to destroy *mat_n if not significative */ 3625 if (color) destroy_mat = PETSC_TRUE; 3626 } else { 3627 comm_n = comm; 3628 } 3629 3630 /* prepare send/receive buffers */ 3631 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3632 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3633 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3634 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3635 if (nis) { 3636 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3637 } 3638 3639 /* Get data from local matrices */ 3640 if (!isdense) { 3641 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3642 /* TODO: See below some guidelines on how to prepare the local buffers */ 3643 /* 3644 send_buffer_vals should contain the raw values of the local matrix 3645 send_buffer_idxs should contain: 3646 - MatType_PRIVATE type 3647 - PetscInt size_of_l2gmap 3648 - PetscInt global_row_indices[size_of_l2gmap] 3649 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3650 */ 3651 } else { 3652 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3653 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3654 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3655 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3656 send_buffer_idxs[1] = i; 3657 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3658 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3659 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3660 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3661 for (i=0;i<n_sends;i++) { 3662 ilengths_vals[is_indices[i]] = len*len; 3663 ilengths_idxs[is_indices[i]] = len+2; 3664 } 3665 } 3666 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3667 /* additional is (if any) */ 3668 if (nis) { 3669 PetscMPIInt psum; 3670 PetscInt j; 3671 for (j=0,psum=0;j<nis;j++) { 3672 PetscInt plen; 3673 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3674 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3675 psum += len+1; /* indices + lenght */ 3676 } 3677 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3678 for (j=0,psum=0;j<nis;j++) { 3679 PetscInt plen; 3680 const PetscInt *is_array_idxs; 3681 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3682 send_buffer_idxs_is[psum] = plen; 3683 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3684 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3685 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3686 psum += plen+1; /* indices + lenght */ 3687 } 3688 for (i=0;i<n_sends;i++) { 3689 ilengths_idxs_is[is_indices[i]] = psum; 3690 } 3691 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3692 } 3693 3694 buf_size_idxs = 0; 3695 buf_size_vals = 0; 3696 buf_size_idxs_is = 0; 3697 for (i=0;i<n_recvs;i++) { 3698 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3699 buf_size_vals += (PetscInt)olengths_vals[i]; 3700 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3701 } 3702 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3703 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3704 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3705 3706 /* get new tags for clean communications */ 3707 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3708 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3709 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3710 3711 /* allocate for requests */ 3712 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3713 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3714 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3715 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3716 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3717 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3718 3719 /* communications */ 3720 ptr_idxs = recv_buffer_idxs; 3721 ptr_vals = recv_buffer_vals; 3722 ptr_idxs_is = recv_buffer_idxs_is; 3723 for (i=0;i<n_recvs;i++) { 3724 source_dest = onodes[i]; 3725 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3726 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3727 ptr_idxs += olengths_idxs[i]; 3728 ptr_vals += olengths_vals[i]; 3729 if (nis) { 3730 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3731 ptr_idxs_is += olengths_idxs_is[i]; 3732 } 3733 } 3734 for (i=0;i<n_sends;i++) { 3735 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3736 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3737 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3738 if (nis) { 3739 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3740 } 3741 } 3742 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3743 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3744 3745 /* assemble new l2g map */ 3746 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3747 ptr_idxs = recv_buffer_idxs; 3748 new_local_rows = 0; 3749 for (i=0;i<n_recvs;i++) { 3750 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3751 ptr_idxs += olengths_idxs[i]; 3752 } 3753 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3754 ptr_idxs = recv_buffer_idxs; 3755 new_local_rows = 0; 3756 for (i=0;i<n_recvs;i++) { 3757 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3758 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3759 ptr_idxs += olengths_idxs[i]; 3760 } 3761 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3762 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3763 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3764 3765 /* infer new local matrix type from received local matrices type */ 3766 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3767 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3768 if (n_recvs) { 3769 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3770 ptr_idxs = recv_buffer_idxs; 3771 for (i=0;i<n_recvs;i++) { 3772 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3773 new_local_type_private = MATAIJ_PRIVATE; 3774 break; 3775 } 3776 ptr_idxs += olengths_idxs[i]; 3777 } 3778 switch (new_local_type_private) { 3779 case MATDENSE_PRIVATE: 3780 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3781 new_local_type = MATSEQAIJ; 3782 bs = 1; 3783 } else { /* if I receive only 1 dense matrix */ 3784 new_local_type = MATSEQDENSE; 3785 bs = 1; 3786 } 3787 break; 3788 case MATAIJ_PRIVATE: 3789 new_local_type = MATSEQAIJ; 3790 bs = 1; 3791 break; 3792 case MATBAIJ_PRIVATE: 3793 new_local_type = MATSEQBAIJ; 3794 break; 3795 case MATSBAIJ_PRIVATE: 3796 new_local_type = MATSEQSBAIJ; 3797 break; 3798 default: 3799 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3800 break; 3801 } 3802 } else { /* by default, new_local_type is seqdense */ 3803 new_local_type = MATSEQDENSE; 3804 bs = 1; 3805 } 3806 3807 /* create MATIS object if needed */ 3808 if (reuse == MAT_INITIAL_MATRIX) { 3809 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3810 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3811 } else { 3812 /* it also destroys the local matrices */ 3813 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3814 } 3815 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3816 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3817 3818 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3819 3820 /* Global to local map of received indices */ 3821 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3822 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3823 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3824 3825 /* restore attributes -> type of incoming data and its size */ 3826 buf_size_idxs = 0; 3827 for (i=0;i<n_recvs;i++) { 3828 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3829 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3830 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3831 } 3832 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3833 3834 /* set preallocation */ 3835 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3836 if (!newisdense) { 3837 PetscInt *new_local_nnz=0; 3838 3839 ptr_vals = recv_buffer_vals; 3840 ptr_idxs = recv_buffer_idxs_local; 3841 if (n_recvs) { 3842 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3843 } 3844 for (i=0;i<n_recvs;i++) { 3845 PetscInt j; 3846 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3847 for (j=0;j<*(ptr_idxs+1);j++) { 3848 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3849 } 3850 } else { 3851 /* TODO */ 3852 } 3853 ptr_idxs += olengths_idxs[i]; 3854 } 3855 if (new_local_nnz) { 3856 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3857 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3858 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3859 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3860 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3861 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3862 } else { 3863 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3864 } 3865 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3866 } else { 3867 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3868 } 3869 3870 /* set values */ 3871 ptr_vals = recv_buffer_vals; 3872 ptr_idxs = recv_buffer_idxs_local; 3873 for (i=0;i<n_recvs;i++) { 3874 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3875 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3876 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3877 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3880 } else { 3881 /* TODO */ 3882 } 3883 ptr_idxs += olengths_idxs[i]; 3884 ptr_vals += olengths_vals[i]; 3885 } 3886 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3891 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3892 3893 #if 0 3894 if (!restrict_comm) { /* check */ 3895 Vec lvec,rvec; 3896 PetscReal infty_error; 3897 3898 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3899 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3900 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3901 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3902 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3903 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3904 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3905 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3906 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3907 } 3908 #endif 3909 3910 /* assemble new additional is (if any) */ 3911 if (nis) { 3912 PetscInt **temp_idxs,*count_is,j,psum; 3913 3914 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3915 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3916 ptr_idxs = recv_buffer_idxs_is; 3917 psum = 0; 3918 for (i=0;i<n_recvs;i++) { 3919 for (j=0;j<nis;j++) { 3920 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3921 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3922 psum += plen; 3923 ptr_idxs += plen+1; /* shift pointer to received data */ 3924 } 3925 } 3926 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3927 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3928 for (i=1;i<nis;i++) { 3929 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3930 } 3931 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3932 ptr_idxs = recv_buffer_idxs_is; 3933 for (i=0;i<n_recvs;i++) { 3934 for (j=0;j<nis;j++) { 3935 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3936 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3937 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3938 ptr_idxs += plen+1; /* shift pointer to received data */ 3939 } 3940 } 3941 for (i=0;i<nis;i++) { 3942 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3943 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3944 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3945 } 3946 ierr = PetscFree(count_is);CHKERRQ(ierr); 3947 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3948 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3949 } 3950 /* free workspace */ 3951 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3952 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3953 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3954 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3955 if (isdense) { 3956 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3957 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3958 } else { 3959 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3960 } 3961 if (nis) { 3962 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3963 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3964 } 3965 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3966 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3967 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3968 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3969 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3970 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3971 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3972 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3973 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3974 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3975 ierr = PetscFree(onodes);CHKERRQ(ierr); 3976 if (nis) { 3977 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3978 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3979 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3980 } 3981 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3982 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3983 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3984 for (i=0;i<nis;i++) { 3985 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3986 } 3987 *mat_n = NULL; 3988 } 3989 PetscFunctionReturn(0); 3990 } 3991 3992 /* temporary hack into ksp private data structure */ 3993 #include <petsc/private/kspimpl.h> 3994 3995 #undef __FUNCT__ 3996 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3997 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3998 { 3999 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4000 PC_IS *pcis = (PC_IS*)pc->data; 4001 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4002 MatNullSpace CoarseNullSpace=NULL; 4003 ISLocalToGlobalMapping coarse_islg; 4004 IS coarse_is,*isarray; 4005 PetscInt i,im_active=-1,active_procs=-1; 4006 PetscInt nis,nisdofs,nisneu; 4007 PC pc_temp; 4008 PCType coarse_pc_type; 4009 KSPType coarse_ksp_type; 4010 PetscBool multilevel_requested,multilevel_allowed; 4011 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4012 Mat t_coarse_mat_is; 4013 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4014 PetscMPIInt all_procs; 4015 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4016 PetscBool compute_vecs = PETSC_FALSE; 4017 PetscScalar *array; 4018 PetscErrorCode ierr; 4019 4020 PetscFunctionBegin; 4021 /* Assign global numbering to coarse dofs */ 4022 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4023 PetscInt ocoarse_size; 4024 compute_vecs = PETSC_TRUE; 4025 ocoarse_size = pcbddc->coarse_size; 4026 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4027 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4028 /* see if we can avoid some work */ 4029 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4030 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 4031 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4032 coarse_reuse = PETSC_FALSE; 4033 } else { /* we can safely reuse already computed coarse matrix */ 4034 coarse_reuse = PETSC_TRUE; 4035 } 4036 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4037 coarse_reuse = PETSC_FALSE; 4038 } 4039 /* reset any subassembling information */ 4040 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4041 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4042 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4043 coarse_reuse = PETSC_TRUE; 4044 } 4045 4046 /* count "active" (i.e. with positive local size) and "void" processes */ 4047 im_active = !!(pcis->n); 4048 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4049 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4050 void_procs = all_procs-active_procs; 4051 csin_type_simple = PETSC_TRUE; 4052 redist = PETSC_FALSE; 4053 if (pcbddc->current_level && void_procs) { 4054 csin_ml = PETSC_TRUE; 4055 ncoarse_ml = void_procs; 4056 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4057 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4058 csin_ds = PETSC_TRUE; 4059 ncoarse_ds = pcbddc->redistribute_coarse; 4060 redist = PETSC_TRUE; 4061 } else { 4062 csin_ds = PETSC_TRUE; 4063 ncoarse_ds = active_procs; 4064 redist = PETSC_TRUE; 4065 } 4066 } else { 4067 csin_ml = PETSC_FALSE; 4068 ncoarse_ml = all_procs; 4069 if (void_procs) { 4070 csin_ds = PETSC_TRUE; 4071 ncoarse_ds = void_procs; 4072 csin_type_simple = PETSC_FALSE; 4073 } else { 4074 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4075 csin_ds = PETSC_TRUE; 4076 ncoarse_ds = pcbddc->redistribute_coarse; 4077 redist = PETSC_TRUE; 4078 } else { 4079 csin_ds = PETSC_FALSE; 4080 ncoarse_ds = all_procs; 4081 } 4082 } 4083 } 4084 4085 /* 4086 test if we can go multilevel: three conditions must be satisfied: 4087 - we have not exceeded the number of levels requested 4088 - we can actually subassemble the active processes 4089 - we can find a suitable number of MPI processes where we can place the subassembled problem 4090 */ 4091 multilevel_allowed = PETSC_FALSE; 4092 multilevel_requested = PETSC_FALSE; 4093 if (pcbddc->current_level < pcbddc->max_levels) { 4094 multilevel_requested = PETSC_TRUE; 4095 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4096 multilevel_allowed = PETSC_FALSE; 4097 } else { 4098 multilevel_allowed = PETSC_TRUE; 4099 } 4100 } 4101 /* determine number of process partecipating to coarse solver */ 4102 if (multilevel_allowed) { 4103 ncoarse = ncoarse_ml; 4104 csin = csin_ml; 4105 redist = PETSC_FALSE; 4106 } else { 4107 ncoarse = ncoarse_ds; 4108 csin = csin_ds; 4109 } 4110 4111 /* creates temporary l2gmap and IS for coarse indexes */ 4112 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4113 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4114 4115 /* creates temporary MATIS object for coarse matrix */ 4116 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4117 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4118 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4119 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4120 #if 0 4121 { 4122 PetscViewer viewer; 4123 char filename[256]; 4124 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4125 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4126 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4127 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4128 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4129 } 4130 #endif 4131 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4132 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4133 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4134 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4135 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4136 4137 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4138 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4139 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4140 const PetscInt *idxs; 4141 ISLocalToGlobalMapping tmap; 4142 4143 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4144 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4145 /* allocate space for temporary storage */ 4146 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4147 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4148 /* allocate for IS array */ 4149 nisdofs = pcbddc->n_ISForDofsLocal; 4150 nisneu = !!pcbddc->NeumannBoundariesLocal; 4151 nis = nisdofs + nisneu; 4152 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4153 /* dofs splitting */ 4154 for (i=0;i<nisdofs;i++) { 4155 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4156 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4157 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4158 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4159 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4160 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4161 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4162 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4163 } 4164 /* neumann boundaries */ 4165 if (pcbddc->NeumannBoundariesLocal) { 4166 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4167 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4168 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4169 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4170 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4171 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4172 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4173 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4174 } 4175 /* free memory */ 4176 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4177 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4178 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4179 } else { 4180 nis = 0; 4181 nisdofs = 0; 4182 nisneu = 0; 4183 isarray = NULL; 4184 } 4185 /* destroy no longer needed map */ 4186 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4187 4188 /* restrict on coarse candidates (if needed) */ 4189 coarse_mat_is = NULL; 4190 if (csin) { 4191 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4192 if (redist) { 4193 PetscMPIInt rank; 4194 PetscInt spc,n_spc_p1,dest[1],destsize; 4195 4196 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4197 spc = active_procs/ncoarse; 4198 n_spc_p1 = active_procs%ncoarse; 4199 if (im_active) { 4200 destsize = 1; 4201 if (rank > n_spc_p1*(spc+1)-1) { 4202 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4203 } else { 4204 dest[0] = rank/(spc+1); 4205 } 4206 } else { 4207 destsize = 0; 4208 } 4209 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4210 } else if (csin_type_simple) { 4211 PetscMPIInt rank; 4212 PetscInt issize,isidx; 4213 4214 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4215 if (im_active) { 4216 issize = 1; 4217 isidx = (PetscInt)rank; 4218 } else { 4219 issize = 0; 4220 isidx = -1; 4221 } 4222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4223 } else { /* get a suitable subassembling pattern from MATIS code */ 4224 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4225 } 4226 4227 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4228 if (!redist || ncoarse <= void_procs) { 4229 PetscInt ncoarse_cand,tissize,*nisindices; 4230 PetscInt *coarse_candidates; 4231 const PetscInt* tisindices; 4232 4233 /* get coarse candidates' ranks in pc communicator */ 4234 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4235 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4236 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4237 if (!coarse_candidates[i]) { 4238 coarse_candidates[ncoarse_cand++]=i; 4239 } 4240 } 4241 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4242 4243 4244 if (pcbddc->dbg_flag) { 4245 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4246 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4247 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4248 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4249 for (i=0;i<ncoarse_cand;i++) { 4250 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4251 } 4252 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4254 } 4255 /* shift the pattern on coarse candidates */ 4256 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4257 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4258 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4259 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4260 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4261 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4262 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4263 } 4264 if (pcbddc->dbg_flag) { 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4267 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4268 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4269 } 4270 } 4271 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4272 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4273 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4274 } else { /* this is the last level, so use just receiving processes in subcomm */ 4275 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4276 } 4277 } else { 4278 if (pcbddc->dbg_flag) { 4279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4281 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4282 } 4283 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4284 coarse_mat_is = t_coarse_mat_is; 4285 } 4286 4287 /* create local to global scatters for coarse problem */ 4288 if (compute_vecs) { 4289 PetscInt lrows; 4290 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4291 if (coarse_mat_is) { 4292 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4293 } else { 4294 lrows = 0; 4295 } 4296 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4297 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4298 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4299 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4300 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4301 } 4302 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4303 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4304 4305 /* set defaults for coarse KSP and PC */ 4306 if (multilevel_allowed) { 4307 coarse_ksp_type = KSPRICHARDSON; 4308 coarse_pc_type = PCBDDC; 4309 } else { 4310 coarse_ksp_type = KSPPREONLY; 4311 coarse_pc_type = PCREDUNDANT; 4312 } 4313 4314 /* print some info if requested */ 4315 if (pcbddc->dbg_flag) { 4316 if (!multilevel_allowed) { 4317 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4318 if (multilevel_requested) { 4319 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4320 } else if (pcbddc->max_levels) { 4321 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4322 } 4323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4324 } 4325 } 4326 4327 /* create the coarse KSP object only once with defaults */ 4328 if (coarse_mat_is) { 4329 MatReuse coarse_mat_reuse; 4330 PetscViewer dbg_viewer = NULL; 4331 if (pcbddc->dbg_flag) { 4332 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4333 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4334 } 4335 if (!pcbddc->coarse_ksp) { 4336 char prefix[256],str_level[16]; 4337 size_t len; 4338 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4339 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4340 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4341 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4342 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4343 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4344 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4345 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4346 /* prefix */ 4347 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4348 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4349 if (!pcbddc->current_level) { 4350 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4351 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4352 } else { 4353 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4354 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4355 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4356 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4357 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4358 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4359 } 4360 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4361 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4362 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4363 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4364 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4365 /* allow user customization */ 4366 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4367 } 4368 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4369 if (nisdofs) { 4370 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4371 for (i=0;i<nisdofs;i++) { 4372 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4373 } 4374 } 4375 if (nisneu) { 4376 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4377 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4378 } 4379 4380 /* get some info after set from options */ 4381 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4382 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4383 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4384 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4385 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4386 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4387 isbddc = PETSC_FALSE; 4388 } 4389 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4390 if (isredundant) { 4391 KSP inner_ksp; 4392 PC inner_pc; 4393 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4394 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4395 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4396 } 4397 4398 /* assemble coarse matrix */ 4399 if (coarse_reuse) { 4400 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4401 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4402 coarse_mat_reuse = MAT_REUSE_MATRIX; 4403 } else { 4404 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4405 } 4406 if (isbddc || isnn) { 4407 if (pcbddc->coarsening_ratio > 1) { 4408 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4409 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4410 if (pcbddc->dbg_flag) { 4411 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4412 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4413 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4414 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4415 } 4416 } 4417 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4418 } else { 4419 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4420 coarse_mat = coarse_mat_is; 4421 } 4422 } else { 4423 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4424 } 4425 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4426 4427 /* propagate symmetry info of coarse matrix */ 4428 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4429 if (pc->pmat->symmetric_set) { 4430 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4431 } 4432 if (pc->pmat->hermitian_set) { 4433 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4434 } 4435 if (pc->pmat->spd_set) { 4436 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4437 } 4438 /* set operators */ 4439 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4440 if (pcbddc->dbg_flag) { 4441 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4442 } 4443 } else { /* processes non partecipating to coarse solver (if any) */ 4444 coarse_mat = 0; 4445 } 4446 ierr = PetscFree(isarray);CHKERRQ(ierr); 4447 #if 0 4448 { 4449 PetscViewer viewer; 4450 char filename[256]; 4451 sprintf(filename,"coarse_mat.m"); 4452 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4453 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4454 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4455 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4456 } 4457 #endif 4458 4459 /* Compute coarse null space (special handling by BDDC only) */ 4460 if (pcbddc->NullSpace) { 4461 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4462 } 4463 4464 if (pcbddc->coarse_ksp) { 4465 Vec crhs,csol; 4466 PetscBool ispreonly; 4467 4468 if (CoarseNullSpace) { 4469 if (isbddc) { 4470 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4471 } else { 4472 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4473 } 4474 } 4475 /* setup coarse ksp */ 4476 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4477 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4478 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4479 /* hack */ 4480 if (!csol) { 4481 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4482 } 4483 if (!crhs) { 4484 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4485 } 4486 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4487 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4488 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4489 KSP check_ksp; 4490 KSPType check_ksp_type; 4491 PC check_pc; 4492 Vec check_vec,coarse_vec; 4493 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4494 PetscInt its; 4495 PetscBool compute_eigs; 4496 PetscReal *eigs_r,*eigs_c; 4497 PetscInt neigs; 4498 const char *prefix; 4499 4500 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4501 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4502 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4503 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4504 if (ispreonly) { 4505 check_ksp_type = KSPPREONLY; 4506 compute_eigs = PETSC_FALSE; 4507 } else { 4508 check_ksp_type = KSPGMRES; 4509 compute_eigs = PETSC_TRUE; 4510 } 4511 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4512 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4513 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4514 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4515 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4516 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4517 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4518 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4519 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4520 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4521 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4522 /* create random vec */ 4523 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4524 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4525 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4526 if (CoarseNullSpace) { 4527 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4528 } 4529 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4530 /* solve coarse problem */ 4531 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4532 if (CoarseNullSpace) { 4533 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4534 } 4535 /* set eigenvalue estimation if preonly has not been requested */ 4536 if (compute_eigs) { 4537 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4538 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4539 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4540 lambda_max = eigs_r[neigs-1]; 4541 lambda_min = eigs_r[0]; 4542 if (pcbddc->use_coarse_estimates) { 4543 if (lambda_max>lambda_min) { 4544 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4545 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4546 } 4547 } 4548 } 4549 4550 /* check coarse problem residual error */ 4551 if (pcbddc->dbg_flag) { 4552 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4553 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4554 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4555 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4556 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4557 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4558 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4559 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4560 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4561 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4562 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4563 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4564 if (compute_eigs) { 4565 PetscReal lambda_max_s,lambda_min_s; 4566 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4567 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4568 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4569 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4570 for (i=0;i<neigs;i++) { 4571 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4572 } 4573 } 4574 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4575 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4576 } 4577 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4578 if (compute_eigs) { 4579 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4580 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4581 } 4582 } 4583 } 4584 /* print additional info */ 4585 if (pcbddc->dbg_flag) { 4586 /* waits until all processes reaches this point */ 4587 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4590 } 4591 4592 /* free memory */ 4593 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4594 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4595 PetscFunctionReturn(0); 4596 } 4597 4598 #undef __FUNCT__ 4599 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4600 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4601 { 4602 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4603 PC_IS* pcis = (PC_IS*)pc->data; 4604 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4605 IS subset,subset_mult,subset_n; 4606 PetscInt local_size,coarse_size=0; 4607 PetscInt *local_primal_indices=NULL; 4608 const PetscInt *t_local_primal_indices; 4609 PetscErrorCode ierr; 4610 4611 PetscFunctionBegin; 4612 /* Compute global number of coarse dofs */ 4613 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4614 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4615 } 4616 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4617 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4618 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4619 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4620 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4621 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4622 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4623 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4624 if (local_size != pcbddc->local_primal_size) { 4625 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4626 } 4627 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4628 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4629 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4630 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4631 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4632 4633 /* check numbering */ 4634 if (pcbddc->dbg_flag) { 4635 PetscScalar coarsesum,*array; 4636 PetscInt i; 4637 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4638 4639 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4640 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4641 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4642 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4643 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4644 for (i=0;i<pcbddc->local_primal_size;i++) { 4645 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4646 } 4647 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4648 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4649 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4650 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4651 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4652 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4653 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4654 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4655 for (i=0;i<pcis->n;i++) { 4656 if (array[i] == 1.0) { 4657 set_error = PETSC_TRUE; 4658 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4659 } 4660 } 4661 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4662 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4663 for (i=0;i<pcis->n;i++) { 4664 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4665 } 4666 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4667 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4668 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4669 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4670 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4671 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4672 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4673 PetscInt *gidxs; 4674 4675 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4676 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4680 for (i=0;i<pcbddc->local_primal_size;i++) { 4681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4682 } 4683 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4684 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4685 } 4686 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4687 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4688 } 4689 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4690 /* get back data */ 4691 *coarse_size_n = coarse_size; 4692 *local_primal_indices_n = local_primal_indices; 4693 PetscFunctionReturn(0); 4694 } 4695 4696 #undef __FUNCT__ 4697 #define __FUNCT__ "PCBDDCGlobalToLocal" 4698 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4699 { 4700 IS localis_t; 4701 PetscInt i,lsize,*idxs,n; 4702 PetscScalar *vals; 4703 PetscErrorCode ierr; 4704 4705 PetscFunctionBegin; 4706 /* get indices in local ordering exploiting local to global map */ 4707 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4708 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4709 for (i=0;i<lsize;i++) vals[i] = 1.0; 4710 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4711 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4712 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4713 if (idxs) { /* multilevel guard */ 4714 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4715 } 4716 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4717 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4718 ierr = PetscFree(vals);CHKERRQ(ierr); 4719 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4720 /* now compute set in local ordering */ 4721 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4722 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4723 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4724 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4725 for (i=0,lsize=0;i<n;i++) { 4726 if (PetscRealPart(vals[i]) > 0.5) { 4727 lsize++; 4728 } 4729 } 4730 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4731 for (i=0,lsize=0;i<n;i++) { 4732 if (PetscRealPart(vals[i]) > 0.5) { 4733 idxs[lsize++] = i; 4734 } 4735 } 4736 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4737 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4738 *localis = localis_t; 4739 PetscFunctionReturn(0); 4740 } 4741 4742 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4743 #undef __FUNCT__ 4744 #define __FUNCT__ "PCBDDCMatMult_Private" 4745 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4746 { 4747 PCBDDCChange_ctx change_ctx; 4748 PetscErrorCode ierr; 4749 4750 PetscFunctionBegin; 4751 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4752 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4753 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4754 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4755 PetscFunctionReturn(0); 4756 } 4757 4758 #undef __FUNCT__ 4759 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4760 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4761 { 4762 PCBDDCChange_ctx change_ctx; 4763 PetscErrorCode ierr; 4764 4765 PetscFunctionBegin; 4766 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4767 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4768 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4769 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4770 PetscFunctionReturn(0); 4771 } 4772 4773 #undef __FUNCT__ 4774 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4775 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4776 { 4777 PC_IS *pcis=(PC_IS*)pc->data; 4778 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4779 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4780 Mat S_j; 4781 PetscInt *used_xadj,*used_adjncy; 4782 PetscBool free_used_adj; 4783 PetscErrorCode ierr; 4784 4785 PetscFunctionBegin; 4786 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4787 free_used_adj = PETSC_FALSE; 4788 if (pcbddc->sub_schurs_layers == -1) { 4789 used_xadj = NULL; 4790 used_adjncy = NULL; 4791 } else { 4792 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4793 used_xadj = pcbddc->mat_graph->xadj; 4794 used_adjncy = pcbddc->mat_graph->adjncy; 4795 } else if (pcbddc->computed_rowadj) { 4796 used_xadj = pcbddc->mat_graph->xadj; 4797 used_adjncy = pcbddc->mat_graph->adjncy; 4798 } else { 4799 PetscBool flg_row=PETSC_FALSE; 4800 const PetscInt *xadj,*adjncy; 4801 PetscInt nvtxs; 4802 4803 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4804 if (flg_row) { 4805 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4806 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4807 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4808 free_used_adj = PETSC_TRUE; 4809 } else { 4810 pcbddc->sub_schurs_layers = -1; 4811 used_xadj = NULL; 4812 used_adjncy = NULL; 4813 } 4814 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4815 } 4816 } 4817 4818 /* setup sub_schurs data */ 4819 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4820 if (!sub_schurs->use_mumps) { 4821 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4822 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4823 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4824 } else { 4825 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4826 PetscBool isseqaij; 4827 if (!pcbddc->use_vertices && reuse_solvers) { 4828 PetscInt n_vertices; 4829 4830 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4831 reuse_solvers = (PetscBool)!n_vertices; 4832 } 4833 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4834 if (!isseqaij) { 4835 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4836 if (matis->A == pcbddc->local_mat) { 4837 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4838 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4839 } else { 4840 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4841 } 4842 } 4843 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4844 } 4845 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4846 4847 /* free adjacency */ 4848 if (free_used_adj) { 4849 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4850 } 4851 PetscFunctionReturn(0); 4852 } 4853 4854 #undef __FUNCT__ 4855 #define __FUNCT__ "PCBDDCInitSubSchurs" 4856 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4857 { 4858 PC_IS *pcis=(PC_IS*)pc->data; 4859 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4860 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4861 PCBDDCGraph graph; 4862 PetscErrorCode ierr; 4863 4864 PetscFunctionBegin; 4865 /* attach interface graph for determining subsets */ 4866 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4867 IS verticesIS,verticescomm; 4868 PetscInt vsize,*idxs; 4869 4870 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4871 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4872 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4873 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4874 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4875 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4876 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4877 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4878 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4879 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4880 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4881 /* 4882 if (pcbddc->dbg_flag) { 4883 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4884 } 4885 */ 4886 } else { 4887 graph = pcbddc->mat_graph; 4888 } 4889 4890 /* sub_schurs init */ 4891 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4892 4893 /* free graph struct */ 4894 if (pcbddc->sub_schurs_rebuild) { 4895 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4896 } 4897 PetscFunctionReturn(0); 4898 } 4899