1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 467 ierr = VecDestroy(&pcbddc->saddle_sums);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 473 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 474 { 475 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 476 PC_IS *pcis = (PC_IS*)pc->data; 477 VecType impVecType; 478 PetscInt n_constraints,n_R,old_size; 479 PetscErrorCode ierr; 480 481 PetscFunctionBegin; 482 if (!pcbddc->ConstraintMatrix) { 483 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 484 } 485 /* get sizes */ 486 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 487 n_R = pcis->n-pcbddc->n_vertices; 488 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 489 /* local work vectors (try to avoid unneeded work)*/ 490 /* R nodes */ 491 old_size = -1; 492 if (pcbddc->vec1_R) { 493 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 494 } 495 if (n_R != old_size) { 496 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 498 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 499 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 500 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 501 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 502 } 503 /* local primal dofs */ 504 old_size = -1; 505 if (pcbddc->vec1_P) { 506 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 507 } 508 if (pcbddc->local_primal_size != old_size) { 509 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 512 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 513 } 514 /* local explicit constraints */ 515 old_size = -1; 516 if (pcbddc->vec1_C) { 517 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 518 } 519 if (n_constraints && n_constraints != old_size) { 520 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 521 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 522 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 523 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 524 } 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "PCBDDCSetUpCorrection" 530 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 531 { 532 PetscErrorCode ierr; 533 /* pointers to pcis and pcbddc */ 534 PC_IS* pcis = (PC_IS*)pc->data; 535 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 536 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 537 /* submatrices of local problem */ 538 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 539 /* submatrices of local coarse problem */ 540 Mat S_VV,S_CV,S_VC,S_CC; 541 /* working matrices */ 542 Mat C_CR; 543 /* additional working stuff */ 544 PC pc_R; 545 Mat F; 546 PetscBool isLU,isCHOL,isILU; 547 548 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 549 PetscScalar *work; 550 PetscInt *idx_V_B; 551 PetscInt n,n_vertices,n_constraints; 552 PetscInt i,n_R,n_D,n_B; 553 PetscBool unsymmetric_check; 554 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 555 MatType impMatType; 556 /* some shortcuts to scalars */ 557 PetscScalar one=1.0,m_one=-1.0; 558 559 PetscFunctionBegin; 560 n_vertices = pcbddc->n_vertices; 561 n_constraints = pcbddc->local_primal_size-n_vertices; 562 /* Set Non-overlapping dimensions */ 563 n_B = pcis->n_B; 564 n_D = pcis->n - n_B; 565 n_R = pcis->n - n_vertices; 566 567 /* Set types for local objects needed by BDDC precondtioner */ 568 impMatType = MATSEQDENSE; 569 570 /* vertices in boundary numbering */ 571 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 572 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 573 if (i != n_vertices) { 574 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 575 } 576 577 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 578 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 586 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 587 588 unsymmetric_check = PETSC_FALSE; 589 /* allocate workspace */ 590 n = 0; 591 if (n_constraints) { 592 n += n_R*n_constraints; 593 } 594 if (n_vertices) { 595 n = PetscMax(2*n_R*n_vertices,n); 596 n = PetscMax((n_R+n_B)*n_vertices,n); 597 } 598 if (!pcbddc->symmetric_primal) { 599 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 600 unsymmetric_check = PETSC_TRUE; 601 } 602 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 603 604 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 605 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 607 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 608 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 609 if (isLU || isILU || isCHOL) { 610 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 611 } else if (sub_schurs->reuse_mumps) { 612 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 613 MatFactorType type; 614 615 F = reuse_mumps->F; 616 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 617 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 618 } else { 619 F = NULL; 620 } 621 622 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 623 if (n_constraints) { 624 Mat M1,M2,M3; 625 Mat auxmat; 626 IS is_aux; 627 PetscScalar *array,*array2; 628 629 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 630 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 631 632 /* Extract constraints on R nodes: C_{CR} */ 633 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 634 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 635 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 636 637 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 638 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 639 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 640 for (i=0;i<n_constraints;i++) { 641 const PetscScalar *row_cmat_values; 642 const PetscInt *row_cmat_indices; 643 PetscInt size_of_constraint,j; 644 645 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 646 for (j=0;j<size_of_constraint;j++) { 647 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 648 } 649 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 650 } 651 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 652 if (F) { 653 Mat B; 654 655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 656 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 657 ierr = MatDestroy(&B);CHKERRQ(ierr); 658 } else { 659 PetscScalar *marr; 660 661 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 662 for (i=0;i<n_constraints;i++) { 663 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 664 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 665 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 666 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 667 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 668 } 669 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 670 } 671 if (!pcbddc->switch_static) { 672 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 673 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 674 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 675 for (i=0;i<n_constraints;i++) { 676 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 677 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 678 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 679 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 681 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 682 } 683 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 684 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 685 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 686 } else { 687 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 688 pcbddc->local_auxmat2 = local_auxmat2_R; 689 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 690 } 691 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 692 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 693 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 694 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 695 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 696 if (isCHOL) { 697 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 698 } else { 699 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 700 } 701 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 702 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 703 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 704 ierr = MatDestroy(&M2);CHKERRQ(ierr); 705 ierr = MatDestroy(&M3);CHKERRQ(ierr); 706 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 707 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 708 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 709 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 710 ierr = MatDestroy(&M1);CHKERRQ(ierr); 711 } 712 /* Get submatrices from subdomain matrix */ 713 if (n_vertices) { 714 IS is_aux; 715 716 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 717 IS tis; 718 719 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 720 ierr = ISSort(tis);CHKERRQ(ierr); 721 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 722 ierr = ISDestroy(&tis);CHKERRQ(ierr); 723 } else { 724 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 725 } 726 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 727 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 729 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 730 } 731 732 /* Matrix of coarse basis functions (local) */ 733 if (pcbddc->coarse_phi_B) { 734 PetscInt on_B,on_primal,on_D=n_D; 735 if (pcbddc->coarse_phi_D) { 736 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 737 } 738 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 739 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 740 PetscScalar *marray; 741 742 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 743 ierr = PetscFree(marray);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 746 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 747 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 748 } 749 } 750 751 if (!pcbddc->coarse_phi_B) { 752 PetscScalar *marray; 753 754 n = n_B*pcbddc->local_primal_size; 755 if (pcbddc->switch_static || pcbddc->dbg_flag) { 756 n += n_D*pcbddc->local_primal_size; 757 } 758 if (!pcbddc->symmetric_primal) { 759 n *= 2; 760 } 761 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 762 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 763 n = n_B*pcbddc->local_primal_size; 764 if (pcbddc->switch_static || pcbddc->dbg_flag) { 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 766 n += n_D*pcbddc->local_primal_size; 767 } 768 if (!pcbddc->symmetric_primal) { 769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 770 if (pcbddc->switch_static || pcbddc->dbg_flag) { 771 n = n_B*pcbddc->local_primal_size; 772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 773 } 774 } else { 775 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 776 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 777 if (pcbddc->switch_static || pcbddc->dbg_flag) { 778 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 779 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 780 } 781 } 782 } 783 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 784 /* vertices */ 785 if (n_vertices) { 786 787 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 788 789 if (n_R) { 790 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 791 PetscBLASInt B_N,B_one = 1; 792 PetscScalar *x,*y; 793 PetscBool isseqaij; 794 795 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 796 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 797 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 798 if (F) { /* TODO could be optimized for symmetric problems */ 799 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 800 } else { 801 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 802 for (i=0;i<n_vertices;i++) { 803 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 804 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 805 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 806 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 807 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 808 } 809 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 810 } 811 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 812 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 813 if (n_constraints) { 814 Mat B; 815 816 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 817 for (i=0;i<n_vertices;i++) { 818 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 819 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 820 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 823 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 824 } 825 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 826 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 827 ierr = MatDestroy(&B);CHKERRQ(ierr); 828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 829 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 830 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 831 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 832 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 833 ierr = MatDestroy(&B);CHKERRQ(ierr); 834 } 835 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 836 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 837 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 838 } 839 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 840 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 841 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 842 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 843 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 844 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 845 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 846 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 847 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 848 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 849 } else { 850 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 851 } 852 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 853 /* coarse basis functions */ 854 for (i=0;i<n_vertices;i++) { 855 PetscScalar *y; 856 857 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 858 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 859 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 860 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 y[n_B*i+idx_V_B[i]] = 1.0; 863 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 864 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 865 866 if (pcbddc->switch_static || pcbddc->dbg_flag) { 867 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 868 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 869 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 872 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 873 } 874 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 875 } 876 /* if n_R == 0 the object is not destroyed */ 877 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 878 } 879 880 if (n_constraints) { 881 Mat B; 882 883 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 886 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 887 if (n_vertices) { 888 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 889 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 890 } else { 891 Mat S_VCt; 892 893 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 894 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 895 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 896 } 897 } 898 ierr = MatDestroy(&B);CHKERRQ(ierr); 899 /* coarse basis functions */ 900 for (i=0;i<n_constraints;i++) { 901 PetscScalar *y; 902 903 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 904 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 905 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 906 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 909 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 910 if (pcbddc->switch_static || pcbddc->dbg_flag) { 911 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 912 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 913 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 916 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 917 } 918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 919 } 920 } 921 if (n_constraints) { 922 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 923 } 924 925 /* compute other basis functions for non-symmetric problems */ 926 if (!pcbddc->symmetric_primal) { 927 928 if (n_constraints) { 929 Mat S_CCT,B_C; 930 931 /* this is a lazy thing */ 932 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 934 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 935 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 936 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 937 if (n_vertices) { 938 Mat B_V,S_VCT; 939 940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 941 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 942 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 943 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 944 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 945 } 946 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 947 } else { /* if there are no constraints, reset work */ 948 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 949 } 950 if (n_vertices && n_R) { 951 Mat A_VRT; 952 PetscScalar *marray; 953 PetscBLASInt B_N,B_one = 1; 954 955 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 956 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 957 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 958 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 959 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 960 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 961 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 962 } 963 964 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 965 for (i=0;i<pcbddc->local_primal_size;i++) { 966 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 967 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 968 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 969 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 970 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 971 } 972 } else { 973 for (i=0;i<pcbddc->local_primal_size;i++) { 974 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 975 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 976 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 977 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 979 } 980 } 981 /* coarse basis functions */ 982 for (i=0;i<pcbddc->local_primal_size;i++) { 983 PetscScalar *y; 984 985 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 986 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 987 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 988 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 if (i<n_vertices) { 991 y[n_B*i+idx_V_B[i]] = 1.0; 992 } 993 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 994 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 995 996 if (pcbddc->switch_static || pcbddc->dbg_flag) { 997 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 998 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 999 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1002 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1003 } 1004 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1005 } 1006 } 1007 /* free memory */ 1008 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1011 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1012 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1013 ierr = PetscFree(work);CHKERRQ(ierr); 1014 if (n_vertices) { 1015 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1016 } 1017 if (n_constraints) { 1018 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1019 } 1020 /* Checking coarse_sub_mat and coarse basis functios */ 1021 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1022 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1023 if (pcbddc->dbg_flag) { 1024 Mat coarse_sub_mat; 1025 Mat AUXMAT,TM1,TM2,TM3,TM4; 1026 Mat coarse_phi_D,coarse_phi_B; 1027 Mat coarse_psi_D,coarse_psi_B; 1028 Mat A_II,A_BB,A_IB,A_BI; 1029 Mat C_B,CPHI; 1030 IS is_dummy; 1031 Vec mones; 1032 MatType checkmattype=MATSEQAIJ; 1033 PetscReal real_value; 1034 1035 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1038 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1039 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1040 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1041 if (unsymmetric_check) { 1042 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1043 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1044 } 1045 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1046 1047 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1049 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1050 if (unsymmetric_check) { 1051 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1053 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1054 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1056 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1057 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1059 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1060 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1061 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1062 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1063 } else { 1064 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1065 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1066 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1068 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1071 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1072 } 1073 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1077 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1078 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1079 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1081 1082 /* check constraints */ 1083 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1084 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1085 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1086 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1087 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1088 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1089 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1091 if (unsymmetric_check) { 1092 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1093 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1094 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1095 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1097 } 1098 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1099 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1100 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1101 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1102 1103 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1106 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1107 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1110 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1111 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1112 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1113 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1114 if (unsymmetric_check) { 1115 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1116 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1117 } 1118 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1119 } 1120 /* get back data */ 1121 *coarse_submat_vals_n = coarse_submat_vals; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1127 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1128 { 1129 Mat *work_mat; 1130 IS isrow_s,iscol_s; 1131 PetscBool rsorted,csorted; 1132 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1137 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1138 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1139 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1140 1141 if (!rsorted) { 1142 const PetscInt *idxs; 1143 PetscInt *idxs_sorted,i; 1144 1145 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1146 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1147 for (i=0;i<rsize;i++) { 1148 idxs_perm_r[i] = i; 1149 } 1150 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1151 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1152 for (i=0;i<rsize;i++) { 1153 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1154 } 1155 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1156 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1157 } else { 1158 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1159 isrow_s = isrow; 1160 } 1161 1162 if (!csorted) { 1163 if (isrow == iscol) { 1164 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1165 iscol_s = isrow_s; 1166 } else { 1167 const PetscInt *idxs; 1168 PetscInt *idxs_sorted,i; 1169 1170 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1171 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1172 for (i=0;i<csize;i++) { 1173 idxs_perm_c[i] = i; 1174 } 1175 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1176 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1177 for (i=0;i<csize;i++) { 1178 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1179 } 1180 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1181 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1182 } 1183 } else { 1184 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1185 iscol_s = iscol; 1186 } 1187 1188 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1189 1190 if (!rsorted || !csorted) { 1191 Mat new_mat; 1192 IS is_perm_r,is_perm_c; 1193 1194 if (!rsorted) { 1195 PetscInt *idxs_r,i; 1196 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1197 for (i=0;i<rsize;i++) { 1198 idxs_r[idxs_perm_r[i]] = i; 1199 } 1200 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1201 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1202 } else { 1203 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1204 } 1205 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1206 1207 if (!csorted) { 1208 if (isrow_s == iscol_s) { 1209 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1210 is_perm_c = is_perm_r; 1211 } else { 1212 PetscInt *idxs_c,i; 1213 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1214 for (i=0;i<csize;i++) { 1215 idxs_c[idxs_perm_c[i]] = i; 1216 } 1217 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1218 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1219 } 1220 } else { 1221 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1222 } 1223 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1224 1225 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1226 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1227 work_mat[0] = new_mat; 1228 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1229 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1230 } 1231 1232 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1233 *B = work_mat[0]; 1234 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1235 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1236 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1242 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1243 { 1244 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1245 PC_IS* pcis = (PC_IS*)pc->data; 1246 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1247 Mat new_mat; 1248 IS is_local,is_global; 1249 PetscInt local_size; 1250 PetscBool isseqaij; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1255 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1256 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1257 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1258 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1259 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1260 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1261 1262 /* check */ 1263 if (pcbddc->dbg_flag) { 1264 Vec x,x_change; 1265 PetscReal error; 1266 1267 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1268 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1269 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1270 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1273 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1274 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1275 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1276 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1278 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1279 ierr = VecDestroy(&x);CHKERRQ(ierr); 1280 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1281 } 1282 1283 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1284 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1285 if (isseqaij) { 1286 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1287 } else { 1288 Mat work_mat; 1289 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1290 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1291 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1292 } 1293 if (matis->A->symmetric_set) { 1294 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1295 #if !defined(PETSC_USE_COMPLEX) 1296 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1297 #endif 1298 } 1299 /* 1300 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1301 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1302 */ 1303 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1309 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1310 { 1311 PC_IS* pcis = (PC_IS*)(pc->data); 1312 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1313 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1314 PetscInt *idx_R_local=NULL; 1315 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1316 PetscInt vbs,bs; 1317 PetscBT bitmask=NULL; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 /* 1322 No need to setup local scatters if 1323 - primal space is unchanged 1324 AND 1325 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1326 AND 1327 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1328 */ 1329 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1330 PetscFunctionReturn(0); 1331 } 1332 /* destroy old objects */ 1333 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1334 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1335 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1336 /* Set Non-overlapping dimensions */ 1337 n_B = pcis->n_B; 1338 n_D = pcis->n - n_B; 1339 n_vertices = pcbddc->n_vertices; 1340 1341 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1342 1343 /* create auxiliary bitmask and allocate workspace */ 1344 if (!sub_schurs->reuse_mumps) { 1345 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1346 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1347 for (i=0;i<n_vertices;i++) { 1348 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1349 } 1350 1351 for (i=0, n_R=0; i<pcis->n; i++) { 1352 if (!PetscBTLookup(bitmask,i)) { 1353 idx_R_local[n_R++] = i; 1354 } 1355 } 1356 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1357 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1358 1359 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1360 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1361 } 1362 1363 /* Block code */ 1364 vbs = 1; 1365 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1366 if (bs>1 && !(n_vertices%bs)) { 1367 PetscBool is_blocked = PETSC_TRUE; 1368 PetscInt *vary; 1369 if (!sub_schurs->reuse_mumps) { 1370 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1371 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1372 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1373 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1374 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1375 for (i=0; i<pcis->n/bs; i++) { 1376 if (vary[i]!=0 && vary[i]!=bs) { 1377 is_blocked = PETSC_FALSE; 1378 break; 1379 } 1380 } 1381 ierr = PetscFree(vary);CHKERRQ(ierr); 1382 } else { 1383 /* Verify directly the R set */ 1384 for (i=0; i<n_R/bs; i++) { 1385 PetscInt j,node=idx_R_local[bs*i]; 1386 for (j=1; j<bs; j++) { 1387 if (node != idx_R_local[bs*i+j]-j) { 1388 is_blocked = PETSC_FALSE; 1389 break; 1390 } 1391 } 1392 } 1393 } 1394 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1395 vbs = bs; 1396 for (i=0;i<n_R/vbs;i++) { 1397 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1398 } 1399 } 1400 } 1401 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1402 if (sub_schurs->reuse_mumps) { 1403 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1404 1405 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1406 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1407 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1408 reuse_mumps->is_R = pcbddc->is_R_local; 1409 } else { 1410 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1411 } 1412 1413 /* print some info if requested */ 1414 if (pcbddc->dbg_flag) { 1415 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1416 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1422 } 1423 1424 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1425 if (!sub_schurs->reuse_mumps) { 1426 IS is_aux1,is_aux2; 1427 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1428 1429 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1430 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1431 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1432 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1433 for (i=0; i<n_D; i++) { 1434 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1435 } 1436 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1437 for (i=0, j=0; i<n_R; i++) { 1438 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1439 aux_array1[j++] = i; 1440 } 1441 } 1442 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1443 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 for (i=0, j=0; i<n_B; i++) { 1445 if (!PetscBTLookup(bitmask,is_indices[i])) { 1446 aux_array2[j++] = i; 1447 } 1448 } 1449 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1450 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1451 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1452 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1453 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1454 1455 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1456 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1457 for (i=0, j=0; i<n_R; i++) { 1458 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1459 aux_array1[j++] = i; 1460 } 1461 } 1462 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1463 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1464 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1465 } 1466 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1467 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1468 } else { 1469 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1470 IS tis; 1471 PetscInt schur_size; 1472 1473 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1474 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1475 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1476 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1477 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1478 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1479 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1480 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1481 } 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1489 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1490 { 1491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1492 PC_IS *pcis = (PC_IS*)pc->data; 1493 PC pc_temp; 1494 Mat A_RR; 1495 MatReuse reuse; 1496 PetscScalar m_one = -1.0; 1497 PetscReal value; 1498 PetscInt n_D,n_R; 1499 PetscBool use_exact,use_exact_reduced,issbaij; 1500 PetscErrorCode ierr; 1501 /* prefixes stuff */ 1502 char dir_prefix[256],neu_prefix[256],str_level[16]; 1503 size_t len; 1504 1505 PetscFunctionBegin; 1506 1507 /* compute prefixes */ 1508 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1509 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1510 if (!pcbddc->current_level) { 1511 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1512 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1513 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1514 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1515 } else { 1516 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1517 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1518 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1519 len -= 15; /* remove "pc_bddc_coarse_" */ 1520 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1521 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1522 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1523 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1524 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1525 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1526 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1527 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1528 } 1529 1530 /* DIRICHLET PROBLEM */ 1531 if (dirichlet) { 1532 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1533 if (pcbddc->local_mat->symmetric_set) { 1534 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1535 } 1536 /* Matrix for Dirichlet problem is pcis->A_II */ 1537 n_D = pcis->n - pcis->n_B; 1538 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1539 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1540 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1541 /* default */ 1542 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1543 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1544 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1545 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1546 if (issbaij) { 1547 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1548 } else { 1549 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1550 } 1551 /* Allow user's customization */ 1552 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1553 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1554 } 1555 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1556 if (sub_schurs->reuse_mumps) { 1557 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1558 1559 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1560 } 1561 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1562 if (!n_D) { 1563 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1564 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1565 } 1566 /* Set Up KSP for Dirichlet problem of BDDC */ 1567 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1568 /* set ksp_D into pcis data */ 1569 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1570 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1571 pcis->ksp_D = pcbddc->ksp_D; 1572 } 1573 1574 /* NEUMANN PROBLEM */ 1575 A_RR = 0; 1576 if (neumann) { 1577 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1578 PetscInt ibs,mbs; 1579 PetscBool issbaij; 1580 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1581 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1582 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1583 if (pcbddc->ksp_R) { /* already created ksp */ 1584 PetscInt nn_R; 1585 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1586 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1587 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1588 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1589 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1590 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1591 reuse = MAT_INITIAL_MATRIX; 1592 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1593 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1594 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1595 reuse = MAT_INITIAL_MATRIX; 1596 } else { /* safe to reuse the matrix */ 1597 reuse = MAT_REUSE_MATRIX; 1598 } 1599 } 1600 /* last check */ 1601 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1602 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1603 reuse = MAT_INITIAL_MATRIX; 1604 } 1605 } else { /* first time, so we need to create the matrix */ 1606 reuse = MAT_INITIAL_MATRIX; 1607 } 1608 /* extract A_RR */ 1609 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1610 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1612 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1613 if (matis->A == pcbddc->local_mat) { 1614 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1615 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1616 } else { 1617 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1618 } 1619 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1620 if (matis->A == pcbddc->local_mat) { 1621 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1622 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1623 } else { 1624 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1625 } 1626 } 1627 if (!sub_schurs->reuse_mumps) { 1628 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1629 if (pcbddc->local_mat->symmetric_set) { 1630 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1631 } 1632 } else { 1633 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1634 1635 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1636 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1637 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1638 } 1639 if (!pcbddc->ksp_R) { /* create object if not present */ 1640 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1641 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1642 /* default */ 1643 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1644 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1645 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1646 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1647 if (issbaij) { 1648 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1649 } else { 1650 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1651 } 1652 /* Allow user's customization */ 1653 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1654 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1655 } 1656 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1657 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1658 if (!n_R) { 1659 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1660 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1661 } 1662 /* Reuse MUMPS solver if it is present */ 1663 if (sub_schurs->reuse_mumps) { 1664 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1665 1666 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1667 } 1668 /* Set Up KSP for Neumann problem of BDDC */ 1669 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1670 } 1671 /* free Neumann problem's matrix */ 1672 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1673 1674 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1675 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1676 if (pcbddc->dbg_flag) { 1677 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1678 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1680 } 1681 if (dirichlet) { /* Dirichlet */ 1682 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1683 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1684 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1685 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1686 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1687 /* need to be adapted? */ 1688 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1689 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1690 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1691 /* print info */ 1692 if (pcbddc->dbg_flag) { 1693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1695 } 1696 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1697 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1698 } 1699 } 1700 if (neumann) { /* Neumann */ 1701 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1702 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1703 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1704 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1705 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1706 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1707 /* need to be adapted? */ 1708 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1709 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1710 /* print info */ 1711 if (pcbddc->dbg_flag) { 1712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1713 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1714 } 1715 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1716 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1717 } 1718 } 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1725 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1726 { 1727 PetscErrorCode ierr; 1728 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1729 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1730 1731 PetscFunctionBegin; 1732 if (!sub_schurs->reuse_mumps) { 1733 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1734 } 1735 if (!pcbddc->switch_static) { 1736 if (applytranspose && pcbddc->local_auxmat1) { 1737 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1738 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1739 } 1740 if (!sub_schurs->reuse_mumps) { 1741 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1742 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1743 } else { 1744 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1745 1746 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1747 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1748 } 1749 } else { 1750 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 if (applytranspose && pcbddc->local_auxmat1) { 1755 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1756 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1757 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1758 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1759 } 1760 } 1761 if (!sub_schurs->reuse_mumps) { 1762 if (applytranspose) { 1763 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1764 } else { 1765 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1766 } 1767 #if defined(PETSC_HAVE_MUMPS) 1768 } else { 1769 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1770 1771 if (applytranspose) { 1772 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1773 } else { 1774 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1775 } 1776 #endif 1777 } 1778 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1779 if (!pcbddc->switch_static) { 1780 if (!sub_schurs->reuse_mumps) { 1781 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1782 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 } else { 1784 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1785 1786 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1787 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1788 } 1789 if (!applytranspose && pcbddc->local_auxmat1) { 1790 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1791 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1792 } 1793 } else { 1794 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1796 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 if (!applytranspose && pcbddc->local_auxmat1) { 1799 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1800 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1801 } 1802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1813 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1814 { 1815 PetscErrorCode ierr; 1816 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1817 PC_IS* pcis = (PC_IS*) (pc->data); 1818 const PetscScalar zero = 0.0; 1819 1820 PetscFunctionBegin; 1821 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1822 if (applytranspose) { 1823 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1824 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1825 } else { 1826 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1827 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1828 } 1829 /* start communications from local primal nodes to rhs of coarse solver */ 1830 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1831 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1832 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1833 1834 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1835 /* TODO remove null space when doing multilevel */ 1836 if (pcbddc->coarse_ksp) { 1837 Vec rhs,sol; 1838 1839 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1840 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1841 if (applytranspose) { 1842 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1843 } else { 1844 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1845 } 1846 } 1847 1848 /* Local solution on R nodes */ 1849 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1850 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1851 } 1852 1853 /* communications from coarse sol to local primal nodes */ 1854 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1855 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1856 1857 /* Sum contributions from two levels */ 1858 if (applytranspose) { 1859 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1860 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1861 } else { 1862 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1863 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1870 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1871 { 1872 PetscErrorCode ierr; 1873 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1874 PetscScalar *array; 1875 Vec from,to; 1876 1877 PetscFunctionBegin; 1878 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1879 from = pcbddc->coarse_vec; 1880 to = pcbddc->vec1_P; 1881 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1882 Vec tvec; 1883 1884 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1885 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1886 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1887 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1888 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1889 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1890 } 1891 } else { /* from local to global -> put data in coarse right hand side */ 1892 from = pcbddc->vec1_P; 1893 to = pcbddc->coarse_vec; 1894 } 1895 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1901 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1902 { 1903 PetscErrorCode ierr; 1904 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1905 PetscScalar *array; 1906 Vec from,to; 1907 1908 PetscFunctionBegin; 1909 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1910 from = pcbddc->coarse_vec; 1911 to = pcbddc->vec1_P; 1912 } else { /* from local to global -> put data in coarse right hand side */ 1913 from = pcbddc->vec1_P; 1914 to = pcbddc->coarse_vec; 1915 } 1916 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1917 if (smode == SCATTER_FORWARD) { 1918 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1919 Vec tvec; 1920 1921 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1922 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1923 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1924 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1925 } 1926 } else { 1927 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1928 ierr = VecResetArray(from);CHKERRQ(ierr); 1929 } 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /* uncomment for testing purposes */ 1935 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1938 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1939 { 1940 PetscErrorCode ierr; 1941 PC_IS* pcis = (PC_IS*)(pc->data); 1942 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1943 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1944 /* one and zero */ 1945 PetscScalar one=1.0,zero=0.0; 1946 /* space to store constraints and their local indices */ 1947 PetscScalar *constraints_data; 1948 PetscInt *constraints_idxs,*constraints_idxs_B; 1949 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1950 PetscInt *constraints_n; 1951 /* iterators */ 1952 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1953 /* BLAS integers */ 1954 PetscBLASInt lwork,lierr; 1955 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1956 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1957 /* reuse */ 1958 PetscInt olocal_primal_size,olocal_primal_size_cc; 1959 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1960 /* change of basis */ 1961 PetscBool qr_needed; 1962 PetscBT change_basis,qr_needed_idx; 1963 /* auxiliary stuff */ 1964 PetscInt *nnz,*is_indices; 1965 PetscInt ncc; 1966 /* some quantities */ 1967 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1968 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1969 1970 PetscFunctionBegin; 1971 /* Destroy Mat objects computed previously */ 1972 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1973 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1974 /* save info on constraints from previous setup (if any) */ 1975 olocal_primal_size = pcbddc->local_primal_size; 1976 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1977 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1978 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1979 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1980 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1981 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1982 1983 /* print some info */ 1984 if (pcbddc->dbg_flag) { 1985 IS vertices; 1986 PetscInt nv,nedges,nfaces; 1987 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1988 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1989 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1996 } 1997 1998 if (!pcbddc->adaptive_selection) { 1999 IS ISForVertices,*ISForFaces,*ISForEdges; 2000 MatNullSpace nearnullsp; 2001 const Vec *nearnullvecs; 2002 Vec *localnearnullsp; 2003 PetscScalar *array; 2004 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2005 PetscBool nnsp_has_cnst; 2006 /* LAPACK working arrays for SVD or POD */ 2007 PetscBool skip_lapack,boolforchange; 2008 PetscScalar *work; 2009 PetscReal *singular_vals; 2010 #if defined(PETSC_USE_COMPLEX) 2011 PetscReal *rwork; 2012 #endif 2013 #if defined(PETSC_MISSING_LAPACK_GESVD) 2014 PetscScalar *temp_basis,*correlation_mat; 2015 #else 2016 PetscBLASInt dummy_int=1; 2017 PetscScalar dummy_scalar=1.; 2018 #endif 2019 2020 /* Get index sets for faces, edges and vertices from graph */ 2021 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2022 /* free unneeded index sets */ 2023 if (!pcbddc->use_vertices) { 2024 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2025 } 2026 if (!pcbddc->use_edges) { 2027 for (i=0;i<n_ISForEdges;i++) { 2028 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2029 } 2030 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2031 n_ISForEdges = 0; 2032 } 2033 if (!pcbddc->use_faces) { 2034 for (i=0;i<n_ISForFaces;i++) { 2035 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2038 n_ISForFaces = 0; 2039 } 2040 2041 #if defined(PETSC_USE_DEBUG) 2042 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2043 Also use_change_of_basis should be consistent among processors */ 2044 if (pcbddc->NullSpace) { 2045 PetscBool tbool[2],gbool[2]; 2046 2047 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2048 pcbddc->use_change_of_basis = PETSC_TRUE; 2049 if (!ISForEdges) { 2050 pcbddc->use_change_on_faces = PETSC_TRUE; 2051 } 2052 } 2053 tbool[0] = pcbddc->use_change_of_basis; 2054 tbool[1] = pcbddc->use_change_on_faces; 2055 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2056 pcbddc->use_change_of_basis = gbool[0]; 2057 pcbddc->use_change_on_faces = gbool[1]; 2058 } 2059 #endif 2060 2061 /* check if near null space is attached to global mat */ 2062 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2063 if (nearnullsp) { 2064 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2065 /* remove any stored info */ 2066 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2067 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2068 /* store information for BDDC solver reuse */ 2069 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2070 pcbddc->onearnullspace = nearnullsp; 2071 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2072 for (i=0;i<nnsp_size;i++) { 2073 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2074 } 2075 } else { /* if near null space is not provided BDDC uses constants by default */ 2076 nnsp_size = 0; 2077 nnsp_has_cnst = PETSC_TRUE; 2078 } 2079 /* get max number of constraints on a single cc */ 2080 max_constraints = nnsp_size; 2081 if (nnsp_has_cnst) max_constraints++; 2082 2083 /* 2084 Evaluate maximum storage size needed by the procedure 2085 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2086 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2087 There can be multiple constraints per connected component 2088 */ 2089 n_vertices = 0; 2090 if (ISForVertices) { 2091 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2092 } 2093 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2094 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2095 2096 total_counts = n_ISForFaces+n_ISForEdges; 2097 total_counts *= max_constraints; 2098 total_counts += n_vertices; 2099 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2100 2101 total_counts = 0; 2102 max_size_of_constraint = 0; 2103 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2104 IS used_is; 2105 if (i<n_ISForEdges) { 2106 used_is = ISForEdges[i]; 2107 } else { 2108 used_is = ISForFaces[i-n_ISForEdges]; 2109 } 2110 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2111 total_counts += j; 2112 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2113 } 2114 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2115 2116 /* get local part of global near null space vectors */ 2117 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2118 for (k=0;k<nnsp_size;k++) { 2119 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2120 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2122 } 2123 2124 /* whether or not to skip lapack calls */ 2125 skip_lapack = PETSC_TRUE; 2126 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2127 2128 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2129 if (!skip_lapack) { 2130 PetscScalar temp_work; 2131 2132 #if defined(PETSC_MISSING_LAPACK_GESVD) 2133 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2134 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2135 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2136 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2137 #if defined(PETSC_USE_COMPLEX) 2138 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2139 #endif 2140 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2141 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2142 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2143 lwork = -1; 2144 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2145 #if !defined(PETSC_USE_COMPLEX) 2146 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2147 #else 2148 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2149 #endif 2150 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2151 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2152 #else /* on missing GESVD */ 2153 /* SVD */ 2154 PetscInt max_n,min_n; 2155 max_n = max_size_of_constraint; 2156 min_n = max_constraints; 2157 if (max_size_of_constraint < max_constraints) { 2158 min_n = max_size_of_constraint; 2159 max_n = max_constraints; 2160 } 2161 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2162 #if defined(PETSC_USE_COMPLEX) 2163 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2164 #endif 2165 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2166 lwork = -1; 2167 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2168 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2170 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2171 #if !defined(PETSC_USE_COMPLEX) 2172 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2173 #else 2174 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2175 #endif 2176 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2177 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2178 #endif /* on missing GESVD */ 2179 /* Allocate optimal workspace */ 2180 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2181 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2182 } 2183 /* Now we can loop on constraining sets */ 2184 total_counts = 0; 2185 constraints_idxs_ptr[0] = 0; 2186 constraints_data_ptr[0] = 0; 2187 /* vertices */ 2188 if (n_vertices) { 2189 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2190 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2191 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2192 for (i=0;i<n_vertices;i++) { 2193 constraints_n[total_counts] = 1; 2194 constraints_data[total_counts] = 1.0; 2195 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2196 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2197 total_counts++; 2198 } 2199 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2200 PetscBool used_vertex; 2201 for (i=0;i<n_vertices;i++) { 2202 used_vertex = PETSC_FALSE; 2203 k = 0; 2204 while (!used_vertex && k<nnsp_size) { 2205 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2206 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2207 constraints_n[total_counts] = 1; 2208 constraints_idxs[total_counts] = is_indices[i]; 2209 constraints_data[total_counts] = 1.0; 2210 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2211 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2212 total_counts++; 2213 used_vertex = PETSC_TRUE; 2214 } 2215 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2216 k++; 2217 } 2218 } 2219 } 2220 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2221 n_vertices = total_counts; 2222 } 2223 2224 /* edges and faces */ 2225 total_counts_cc = total_counts; 2226 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2227 IS used_is; 2228 PetscBool idxs_copied = PETSC_FALSE; 2229 2230 if (ncc<n_ISForEdges) { 2231 used_is = ISForEdges[ncc]; 2232 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2233 } else { 2234 used_is = ISForFaces[ncc-n_ISForEdges]; 2235 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2236 } 2237 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2238 2239 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2240 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2241 /* change of basis should not be performed on local periodic nodes */ 2242 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2243 if (nnsp_has_cnst) { 2244 PetscScalar quad_value; 2245 2246 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2247 idxs_copied = PETSC_TRUE; 2248 2249 if (!pcbddc->use_nnsp_true) { 2250 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2251 } else { 2252 quad_value = 1.0; 2253 } 2254 for (j=0;j<size_of_constraint;j++) { 2255 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2256 } 2257 temp_constraints++; 2258 total_counts++; 2259 } 2260 for (k=0;k<nnsp_size;k++) { 2261 PetscReal real_value; 2262 PetscScalar *ptr_to_data; 2263 2264 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2265 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2266 for (j=0;j<size_of_constraint;j++) { 2267 ptr_to_data[j] = array[is_indices[j]]; 2268 } 2269 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2270 /* check if array is null on the connected component */ 2271 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2272 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2273 if (real_value > 0.0) { /* keep indices and values */ 2274 temp_constraints++; 2275 total_counts++; 2276 if (!idxs_copied) { 2277 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2278 idxs_copied = PETSC_TRUE; 2279 } 2280 } 2281 } 2282 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2283 valid_constraints = temp_constraints; 2284 if (!pcbddc->use_nnsp_true && temp_constraints) { 2285 if (temp_constraints == 1) { /* just normalize the constraint */ 2286 PetscScalar norm,*ptr_to_data; 2287 2288 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2289 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2290 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2291 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2292 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2293 } else { /* perform SVD */ 2294 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2295 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2296 2297 #if defined(PETSC_MISSING_LAPACK_GESVD) 2298 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2299 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2300 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2301 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2302 from that computed using LAPACKgesvd 2303 -> This is due to a different computation of eigenvectors in LAPACKheev 2304 -> The quality of the POD-computed basis will be the same */ 2305 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2306 /* Store upper triangular part of correlation matrix */ 2307 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2308 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2309 for (j=0;j<temp_constraints;j++) { 2310 for (k=0;k<j+1;k++) { 2311 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2312 } 2313 } 2314 /* compute eigenvalues and eigenvectors of correlation matrix */ 2315 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2316 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2317 #if !defined(PETSC_USE_COMPLEX) 2318 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2319 #else 2320 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2321 #endif 2322 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2323 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2324 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2325 j = 0; 2326 while (j < temp_constraints && singular_vals[j] < tol) j++; 2327 total_counts = total_counts-j; 2328 valid_constraints = temp_constraints-j; 2329 /* scale and copy POD basis into used quadrature memory */ 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2334 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2335 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2336 if (j<temp_constraints) { 2337 PetscInt ii; 2338 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2339 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2340 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2341 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2342 for (k=0;k<temp_constraints-j;k++) { 2343 for (ii=0;ii<size_of_constraint;ii++) { 2344 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2345 } 2346 } 2347 } 2348 #else /* on missing GESVD */ 2349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2350 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2351 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2352 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2353 #if !defined(PETSC_USE_COMPLEX) 2354 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2355 #else 2356 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2357 #endif 2358 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2359 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2360 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2361 k = temp_constraints; 2362 if (k > size_of_constraint) k = size_of_constraint; 2363 j = 0; 2364 while (j < k && singular_vals[k-j-1] < tol) j++; 2365 valid_constraints = k-j; 2366 total_counts = total_counts-temp_constraints+valid_constraints; 2367 #endif /* on missing GESVD */ 2368 } 2369 } 2370 /* update pointers information */ 2371 if (valid_constraints) { 2372 constraints_n[total_counts_cc] = valid_constraints; 2373 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2374 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2375 /* set change_of_basis flag */ 2376 if (boolforchange) { 2377 PetscBTSet(change_basis,total_counts_cc); 2378 } 2379 total_counts_cc++; 2380 } 2381 } 2382 /* free workspace */ 2383 if (!skip_lapack) { 2384 ierr = PetscFree(work);CHKERRQ(ierr); 2385 #if defined(PETSC_USE_COMPLEX) 2386 ierr = PetscFree(rwork);CHKERRQ(ierr); 2387 #endif 2388 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2389 #if defined(PETSC_MISSING_LAPACK_GESVD) 2390 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2391 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2392 #endif 2393 } 2394 for (k=0;k<nnsp_size;k++) { 2395 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2396 } 2397 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2398 /* free index sets of faces, edges and vertices */ 2399 for (i=0;i<n_ISForFaces;i++) { 2400 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2401 } 2402 if (n_ISForFaces) { 2403 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2404 } 2405 for (i=0;i<n_ISForEdges;i++) { 2406 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2407 } 2408 if (n_ISForEdges) { 2409 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2410 } 2411 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2412 } else { 2413 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2414 2415 total_counts = 0; 2416 n_vertices = 0; 2417 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2418 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2419 } 2420 max_constraints = 0; 2421 total_counts_cc = 0; 2422 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2423 total_counts += pcbddc->adaptive_constraints_n[i]; 2424 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2425 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2426 } 2427 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2428 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2429 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2430 constraints_data = pcbddc->adaptive_constraints_data; 2431 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2432 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2433 total_counts_cc = 0; 2434 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2435 if (pcbddc->adaptive_constraints_n[i]) { 2436 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2437 } 2438 } 2439 #if 0 2440 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2441 for (i=0;i<total_counts_cc;i++) { 2442 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2443 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2444 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2445 printf(" %d",constraints_idxs[j]); 2446 } 2447 printf("\n"); 2448 printf("number of cc: %d\n",constraints_n[i]); 2449 } 2450 for (i=0;i<n_vertices;i++) { 2451 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2452 } 2453 for (i=0;i<sub_schurs->n_subs;i++) { 2454 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2455 } 2456 #endif 2457 2458 max_size_of_constraint = 0; 2459 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2460 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2461 /* Change of basis */ 2462 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2463 if (pcbddc->use_change_of_basis) { 2464 for (i=0;i<sub_schurs->n_subs;i++) { 2465 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2466 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2467 } 2468 } 2469 } 2470 } 2471 pcbddc->local_primal_size = total_counts; 2472 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2473 2474 /* map constraints_idxs in boundary numbering */ 2475 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2476 if (i != constraints_idxs_ptr[total_counts_cc]) { 2477 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2478 } 2479 2480 /* Create constraint matrix */ 2481 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2482 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2483 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2484 2485 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2486 /* determine if a QR strategy is needed for change of basis */ 2487 qr_needed = PETSC_FALSE; 2488 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2489 total_primal_vertices=0; 2490 pcbddc->local_primal_size_cc = 0; 2491 for (i=0;i<total_counts_cc;i++) { 2492 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2493 if (size_of_constraint == 1) { 2494 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2495 pcbddc->local_primal_size_cc += 1; 2496 } else if (PetscBTLookup(change_basis,i)) { 2497 for (k=0;k<constraints_n[i];k++) { 2498 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2499 } 2500 pcbddc->local_primal_size_cc += constraints_n[i]; 2501 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2502 PetscBTSet(qr_needed_idx,i); 2503 qr_needed = PETSC_TRUE; 2504 } 2505 } else { 2506 pcbddc->local_primal_size_cc += 1; 2507 } 2508 } 2509 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2510 pcbddc->n_vertices = total_primal_vertices; 2511 /* permute indices in order to have a sorted set of vertices */ 2512 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2513 2514 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2515 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2516 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2517 2518 /* nonzero structure of constraint matrix */ 2519 /* and get reference dof for local constraints */ 2520 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2521 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2522 2523 j = total_primal_vertices; 2524 total_counts = total_primal_vertices; 2525 cum = total_primal_vertices; 2526 for (i=n_vertices;i<total_counts_cc;i++) { 2527 if (!PetscBTLookup(change_basis,i)) { 2528 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2529 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2530 cum++; 2531 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2532 for (k=0;k<constraints_n[i];k++) { 2533 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2534 nnz[j+k] = size_of_constraint; 2535 } 2536 j += constraints_n[i]; 2537 } 2538 } 2539 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2540 ierr = PetscFree(nnz);CHKERRQ(ierr); 2541 2542 /* set values in constraint matrix */ 2543 for (i=0;i<total_primal_vertices;i++) { 2544 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2545 } 2546 total_counts = total_primal_vertices; 2547 for (i=n_vertices;i<total_counts_cc;i++) { 2548 if (!PetscBTLookup(change_basis,i)) { 2549 PetscInt *cols; 2550 2551 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2552 cols = constraints_idxs+constraints_idxs_ptr[i]; 2553 for (k=0;k<constraints_n[i];k++) { 2554 PetscInt row = total_counts+k; 2555 PetscScalar *vals; 2556 2557 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2558 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2559 } 2560 total_counts += constraints_n[i]; 2561 } 2562 } 2563 /* assembling */ 2564 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2565 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2566 2567 /* 2568 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2569 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2570 */ 2571 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2572 if (pcbddc->use_change_of_basis) { 2573 /* dual and primal dofs on a single cc */ 2574 PetscInt dual_dofs,primal_dofs; 2575 /* working stuff for GEQRF */ 2576 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2577 PetscBLASInt lqr_work; 2578 /* working stuff for UNGQR */ 2579 PetscScalar *gqr_work,lgqr_work_t; 2580 PetscBLASInt lgqr_work; 2581 /* working stuff for TRTRS */ 2582 PetscScalar *trs_rhs; 2583 PetscBLASInt Blas_NRHS; 2584 /* pointers for values insertion into change of basis matrix */ 2585 PetscInt *start_rows,*start_cols; 2586 PetscScalar *start_vals; 2587 /* working stuff for values insertion */ 2588 PetscBT is_primal; 2589 PetscInt *aux_primal_numbering_B; 2590 /* matrix sizes */ 2591 PetscInt global_size,local_size; 2592 /* temporary change of basis */ 2593 Mat localChangeOfBasisMatrix; 2594 /* extra space for debugging */ 2595 PetscScalar *dbg_work; 2596 2597 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2598 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2599 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2600 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2601 /* nonzeros for local mat */ 2602 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2603 for (i=0;i<pcis->n;i++) nnz[i]=1; 2604 for (i=n_vertices;i<total_counts_cc;i++) { 2605 if (PetscBTLookup(change_basis,i)) { 2606 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2607 if (PetscBTLookup(qr_needed_idx,i)) { 2608 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2609 } else { 2610 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2611 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2612 } 2613 } 2614 } 2615 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2616 ierr = PetscFree(nnz);CHKERRQ(ierr); 2617 /* Set initial identity in the matrix */ 2618 for (i=0;i<pcis->n;i++) { 2619 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2620 } 2621 2622 if (pcbddc->dbg_flag) { 2623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2625 } 2626 2627 2628 /* Now we loop on the constraints which need a change of basis */ 2629 /* 2630 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2631 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2632 2633 Basic blocks of change of basis matrix T computed by 2634 2635 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2636 2637 | 1 0 ... 0 s_1/S | 2638 | 0 1 ... 0 s_2/S | 2639 | ... | 2640 | 0 ... 1 s_{n-1}/S | 2641 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2642 2643 with S = \sum_{i=1}^n s_i^2 2644 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2645 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2646 2647 - QR decomposition of constraints otherwise 2648 */ 2649 if (qr_needed) { 2650 /* space to store Q */ 2651 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2652 /* first we issue queries for optimal work */ 2653 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2654 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2655 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2656 lqr_work = -1; 2657 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2658 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2659 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2660 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2661 lgqr_work = -1; 2662 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2664 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2665 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2666 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2667 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2668 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2669 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2670 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2671 /* array to store scaling factors for reflectors */ 2672 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2673 /* array to store rhs and solution of triangular solver */ 2674 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2675 /* allocating workspace for check */ 2676 if (pcbddc->dbg_flag) { 2677 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2678 } 2679 } 2680 /* array to store whether a node is primal or not */ 2681 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2682 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2683 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2684 if (i != total_primal_vertices) { 2685 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2686 } 2687 for (i=0;i<total_primal_vertices;i++) { 2688 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2689 } 2690 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2691 2692 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2693 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2694 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2695 if (PetscBTLookup(change_basis,total_counts)) { 2696 /* get constraint info */ 2697 primal_dofs = constraints_n[total_counts]; 2698 dual_dofs = size_of_constraint-primal_dofs; 2699 2700 if (pcbddc->dbg_flag) { 2701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2702 } 2703 2704 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2705 2706 /* copy quadrature constraints for change of basis check */ 2707 if (pcbddc->dbg_flag) { 2708 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2709 } 2710 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2711 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2712 2713 /* compute QR decomposition of constraints */ 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2717 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2718 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2719 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2720 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2721 2722 /* explictly compute R^-T */ 2723 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2724 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2729 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2730 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2731 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2732 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2733 2734 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2737 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2738 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2739 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2740 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2741 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2742 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2743 2744 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2745 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2746 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2751 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2752 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2753 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2754 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2755 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2756 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2757 2758 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2759 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2760 /* insert cols for primal dofs */ 2761 for (j=0;j<primal_dofs;j++) { 2762 start_vals = &qr_basis[j*size_of_constraint]; 2763 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2764 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2765 } 2766 /* insert cols for dual dofs */ 2767 for (j=0,k=0;j<dual_dofs;k++) { 2768 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2769 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2770 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2771 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2772 j++; 2773 } 2774 } 2775 2776 /* check change of basis */ 2777 if (pcbddc->dbg_flag) { 2778 PetscInt ii,jj; 2779 PetscBool valid_qr=PETSC_TRUE; 2780 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2784 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2785 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2786 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2787 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2788 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2789 for (jj=0;jj<size_of_constraint;jj++) { 2790 for (ii=0;ii<primal_dofs;ii++) { 2791 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2792 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2793 } 2794 } 2795 if (!valid_qr) { 2796 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2797 for (jj=0;jj<size_of_constraint;jj++) { 2798 for (ii=0;ii<primal_dofs;ii++) { 2799 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2800 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2801 } 2802 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2803 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2804 } 2805 } 2806 } 2807 } else { 2808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2809 } 2810 } 2811 } else { /* simple transformation block */ 2812 PetscInt row,col; 2813 PetscScalar val,norm; 2814 2815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2816 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2817 for (j=0;j<size_of_constraint;j++) { 2818 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2819 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2820 if (!PetscBTLookup(is_primal,row_B)) { 2821 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2822 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2823 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2824 } else { 2825 for (k=0;k<size_of_constraint;k++) { 2826 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2827 if (row != col) { 2828 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2829 } else { 2830 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2831 } 2832 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2833 } 2834 } 2835 } 2836 if (pcbddc->dbg_flag) { 2837 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2838 } 2839 } 2840 } else { 2841 if (pcbddc->dbg_flag) { 2842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2843 } 2844 } 2845 } 2846 2847 /* free workspace */ 2848 if (qr_needed) { 2849 if (pcbddc->dbg_flag) { 2850 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2851 } 2852 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2853 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2854 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2855 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2856 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2857 } 2858 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2859 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 2862 /* assembling of global change of variable */ 2863 { 2864 Mat tmat; 2865 PetscInt bs; 2866 2867 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2868 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2869 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2870 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2871 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2872 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2873 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2874 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2875 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2876 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2877 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2878 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2879 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2880 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2881 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2882 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2883 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2884 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2885 } 2886 /* check */ 2887 if (pcbddc->dbg_flag) { 2888 PetscReal error; 2889 Vec x,x_change; 2890 2891 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2892 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2893 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2894 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2895 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2896 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2897 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2898 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2899 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2900 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2901 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2902 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2903 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2905 ierr = VecDestroy(&x);CHKERRQ(ierr); 2906 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2907 } 2908 2909 /* adapt sub_schurs computed (if any) */ 2910 if (pcbddc->use_deluxe_scaling) { 2911 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2912 if (sub_schurs->S_Ej_all) { 2913 Mat S_new,tmat; 2914 IS is_all_N; 2915 2916 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2917 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2918 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2919 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2920 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2921 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2922 sub_schurs->S_Ej_all = S_new; 2923 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2924 if (sub_schurs->sum_S_Ej_all) { 2925 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2926 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2927 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2928 sub_schurs->sum_S_Ej_all = S_new; 2929 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2930 } 2931 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2932 } 2933 } 2934 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2935 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2936 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2937 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2938 } 2939 2940 /* set up change of basis context */ 2941 if (pcbddc->ChangeOfBasisMatrix) { 2942 PCBDDCChange_ctx change_ctx; 2943 2944 if (!pcbddc->new_global_mat) { 2945 PetscInt global_size,local_size; 2946 2947 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2948 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2949 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2950 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2951 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2952 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2953 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2954 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2955 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2956 } else { 2957 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2958 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2959 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2960 } 2961 if (!pcbddc->user_ChangeOfBasisMatrix) { 2962 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2963 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2964 } else { 2965 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2966 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2967 } 2968 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2969 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2970 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 } 2973 2974 /* check if a new primal space has been introduced */ 2975 pcbddc->new_primal_space_local = PETSC_TRUE; 2976 if (olocal_primal_size == pcbddc->local_primal_size) { 2977 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2978 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2979 if (!pcbddc->new_primal_space_local) { 2980 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2981 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2982 } 2983 } 2984 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2985 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2986 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2987 2988 /* flush dbg viewer */ 2989 if (pcbddc->dbg_flag) { 2990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2991 } 2992 2993 /* free workspace */ 2994 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2995 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2996 if (!pcbddc->adaptive_selection) { 2997 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2998 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2999 } else { 3000 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3001 pcbddc->adaptive_constraints_idxs_ptr, 3002 pcbddc->adaptive_constraints_data_ptr, 3003 pcbddc->adaptive_constraints_idxs, 3004 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3005 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3006 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3007 } 3008 PetscFunctionReturn(0); 3009 } 3010 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3013 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3014 { 3015 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3016 PC_IS *pcis = (PC_IS*)pc->data; 3017 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3018 PetscInt ierr,i,vertex_size,N; 3019 PetscViewer viewer=pcbddc->dbg_viewer; 3020 3021 PetscFunctionBegin; 3022 /* Reset previously computed graph */ 3023 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3024 /* Init local Graph struct */ 3025 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3026 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3027 3028 /* Check validity of the csr graph passed in by the user */ 3029 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3030 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3031 } 3032 3033 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3034 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3035 PetscInt *xadj,*adjncy; 3036 PetscInt nvtxs; 3037 PetscBool flg_row=PETSC_FALSE; 3038 3039 if (pcbddc->use_local_adj) { 3040 3041 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 if (flg_row) { 3043 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3044 pcbddc->computed_rowadj = PETSC_TRUE; 3045 } 3046 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3047 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3048 IS is_dummy; 3049 ISLocalToGlobalMapping l2gmap_dummy; 3050 PetscInt j,sum; 3051 PetscInt *cxadj,*cadjncy; 3052 const PetscInt *idxs; 3053 PCBDDCGraph graph; 3054 PetscBT is_on_boundary; 3055 3056 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3057 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3058 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3059 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3060 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3061 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3062 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3063 if (flg_row) { 3064 graph->xadj = xadj; 3065 graph->adjncy = adjncy; 3066 } 3067 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3068 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3069 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3070 3071 if (pcbddc->dbg_flag) { 3072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3073 for (i=0;i<graph->ncc;i++) { 3074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3075 } 3076 } 3077 3078 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3079 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3080 for (i=0;i<pcis->n_B;i++) { 3081 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3082 } 3083 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3084 3085 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3086 sum = 0; 3087 for (i=0;i<graph->ncc;i++) { 3088 PetscInt sizecc = 0; 3089 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3090 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3091 sizecc++; 3092 } 3093 } 3094 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3095 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3096 cxadj[graph->queue[j]] = sizecc; 3097 } 3098 } 3099 sum += sizecc*sizecc; 3100 } 3101 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3102 sum = 0; 3103 for (i=0;i<pcis->n;i++) { 3104 PetscInt temp = cxadj[i]; 3105 cxadj[i] = sum; 3106 sum += temp; 3107 } 3108 cxadj[pcis->n] = sum; 3109 for (i=0;i<graph->ncc;i++) { 3110 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3111 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3112 PetscInt k,sizecc = 0; 3113 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3114 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3115 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3116 sizecc++; 3117 } 3118 } 3119 } 3120 } 3121 } 3122 if (sum) { 3123 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3124 } else { 3125 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3126 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3127 } 3128 graph->xadj = 0; 3129 graph->adjncy = 0; 3130 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3131 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3132 } 3133 } 3134 if (pcbddc->dbg_flag) { 3135 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3136 } 3137 3138 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3139 vertex_size = 1; 3140 if (pcbddc->user_provided_isfordofs) { 3141 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3142 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3143 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3144 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3145 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3146 } 3147 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3148 pcbddc->n_ISForDofs = 0; 3149 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3150 } 3151 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3152 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3153 } else { 3154 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3155 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3156 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3157 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3158 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3159 } 3160 } 3161 } 3162 3163 /* Setup of Graph */ 3164 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3165 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3166 } 3167 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3168 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3169 } 3170 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3171 3172 /* Graph's connected components analysis */ 3173 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3174 3175 /* print some info to stdout */ 3176 if (pcbddc->dbg_flag) { 3177 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3178 } 3179 3180 /* mark topography has done */ 3181 pcbddc->recompute_topography = PETSC_FALSE; 3182 PetscFunctionReturn(0); 3183 } 3184 3185 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "PCBDDCSubsetNumbering" 3188 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3189 { 3190 PetscSF sf; 3191 PetscLayout map; 3192 const PetscInt *idxs; 3193 PetscInt *leaf_data,*root_data,*gidxs; 3194 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3195 PetscInt n_n,nlocals,start,first_index; 3196 PetscMPIInt commsize; 3197 PetscBool first_found; 3198 PetscErrorCode ierr; 3199 3200 PetscFunctionBegin; 3201 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3202 if (subset_mult) { 3203 PetscCheckSameComm(subset,1,subset_mult,2); 3204 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3205 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3206 } 3207 /* create workspace layout for computing global indices of subset */ 3208 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3209 lbounds[0] = lbounds[1] = 0; 3210 for (i=0;i<n;i++) { 3211 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3212 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3213 } 3214 lbounds[0] = -lbounds[0]; 3215 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3216 gbounds[0] = -gbounds[0]; 3217 N = gbounds[1] - gbounds[0] + 1; 3218 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3220 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3222 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3223 3224 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3225 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3226 if (subset_mult) { 3227 const PetscInt* idxs_mult; 3228 3229 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3230 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3231 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3232 } else { 3233 for (i=0;i<n;i++) leaf_data[i] = 1; 3234 } 3235 /* local size of new subset */ 3236 n_n = 0; 3237 for (i=0;i<n;i++) n_n += leaf_data[i]; 3238 3239 /* global indexes in layout */ 3240 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3241 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3242 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3243 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3244 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3245 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3246 3247 /* reduce from leaves to roots */ 3248 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3249 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3250 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3251 3252 /* count indexes in local part of layout */ 3253 nlocals = 0; 3254 first_index = -1; 3255 first_found = PETSC_FALSE; 3256 for (i=0;i<Nl;i++) { 3257 if (!first_found && root_data[i]) { 3258 first_found = PETSC_TRUE; 3259 first_index = i; 3260 } 3261 nlocals += root_data[i]; 3262 } 3263 3264 /* cumulative of number of indexes and size of subset without holes */ 3265 #if defined(PETSC_HAVE_MPI_EXSCAN) 3266 start = 0; 3267 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3268 #else 3269 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3270 start = start-nlocals; 3271 #endif 3272 3273 if (N_n) { /* compute total size of new subset if requested */ 3274 *N_n = start + nlocals; 3275 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3276 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3277 } 3278 3279 /* adapt root data with cumulative */ 3280 if (first_found) { 3281 PetscInt old_index; 3282 3283 root_data[first_index] += start; 3284 old_index = first_index; 3285 for (i=first_index+1;i<Nl;i++) { 3286 if (root_data[i]) { 3287 root_data[i] += root_data[old_index]; 3288 old_index = i; 3289 } 3290 } 3291 } 3292 3293 /* from roots to leaves */ 3294 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3295 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3296 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3297 3298 /* create new IS with global indexes without holes */ 3299 if (subset_mult) { 3300 const PetscInt* idxs_mult; 3301 PetscInt cum; 3302 3303 cum = 0; 3304 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3305 for (i=0;i<n;i++) { 3306 PetscInt j; 3307 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3308 } 3309 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3310 } else { 3311 for (i=0;i<n;i++) { 3312 gidxs[i] = leaf_data[i]-1; 3313 } 3314 } 3315 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3316 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 3320 #undef __FUNCT__ 3321 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3322 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3323 { 3324 PetscInt i,j; 3325 PetscScalar *alphas; 3326 PetscErrorCode ierr; 3327 3328 PetscFunctionBegin; 3329 /* this implements stabilized Gram-Schmidt */ 3330 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3331 for (i=0;i<n;i++) { 3332 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3333 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3334 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3335 } 3336 ierr = PetscFree(alphas);CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatISGetSubassemblingPattern" 3342 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3343 { 3344 IS ranks_send_to; 3345 PetscInt n_neighs,*neighs,*n_shared,**shared; 3346 PetscMPIInt size,rank,color; 3347 PetscInt *xadj,*adjncy; 3348 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3349 PetscInt i,local_size,threshold=0; 3350 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3351 PetscSubcomm subcomm; 3352 PetscErrorCode ierr; 3353 3354 PetscFunctionBegin; 3355 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3356 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3357 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3358 3359 /* Get info on mapping */ 3360 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3361 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3362 3363 /* build local CSR graph of subdomains' connectivity */ 3364 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3365 xadj[0] = 0; 3366 xadj[1] = PetscMax(n_neighs-1,0); 3367 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3368 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3369 3370 if (threshold) { 3371 PetscInt xadj_count = 0; 3372 for (i=1;i<n_neighs;i++) { 3373 if (n_shared[i] > threshold) { 3374 adjncy[xadj_count] = neighs[i]; 3375 adjncy_wgt[xadj_count] = n_shared[i]; 3376 xadj_count++; 3377 } 3378 } 3379 xadj[1] = xadj_count; 3380 } else { 3381 if (xadj[1]) { 3382 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3383 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3384 } 3385 } 3386 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3387 if (use_square) { 3388 for (i=0;i<xadj[1];i++) { 3389 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3390 } 3391 } 3392 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3393 3394 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3395 3396 /* 3397 Restrict work on active processes only. 3398 */ 3399 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3400 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3401 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3402 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3403 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3404 if (color) { 3405 ierr = PetscFree(xadj);CHKERRQ(ierr); 3406 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3407 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3408 } else { 3409 Mat subdomain_adj; 3410 IS new_ranks,new_ranks_contig; 3411 MatPartitioning partitioner; 3412 PetscInt prank,rstart=0,rend=0; 3413 PetscInt *is_indices,*oldranks; 3414 PetscBool aggregate; 3415 3416 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3417 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3418 prank = rank; 3419 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3420 /* 3421 for (i=0;i<size;i++) { 3422 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3423 } 3424 */ 3425 for (i=0;i<xadj[1];i++) { 3426 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3427 } 3428 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3429 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3430 if (aggregate) { 3431 PetscInt lrows,row,ncols,*cols; 3432 PetscMPIInt nrank; 3433 PetscScalar *vals; 3434 3435 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3436 lrows = 0; 3437 if (nrank<redprocs) { 3438 lrows = size/redprocs; 3439 if (nrank<size%redprocs) lrows++; 3440 } 3441 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3442 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3443 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3444 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3445 row = nrank; 3446 ncols = xadj[1]-xadj[0]; 3447 cols = adjncy; 3448 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3449 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3450 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3451 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3452 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3453 ierr = PetscFree(xadj);CHKERRQ(ierr); 3454 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3455 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3456 ierr = PetscFree(vals);CHKERRQ(ierr); 3457 } else { 3458 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3459 } 3460 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3461 3462 /* Partition */ 3463 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3464 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3465 if (use_vwgt) { 3466 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3467 v_wgt[0] = local_size; 3468 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3469 } 3470 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3471 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3472 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3473 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3474 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3475 3476 /* renumber new_ranks to avoid "holes" in new set of processors */ 3477 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3478 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3479 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3480 if (!redprocs) { 3481 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3482 } else { 3483 PetscInt idxs[1]; 3484 PetscMPIInt tag; 3485 MPI_Request *reqs; 3486 3487 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3488 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3489 for (i=rstart;i<rend;i++) { 3490 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3491 } 3492 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3493 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3494 ierr = PetscFree(reqs);CHKERRQ(ierr); 3495 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3496 } 3497 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3498 /* clean up */ 3499 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3500 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3501 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3502 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3503 } 3504 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3505 3506 /* assemble parallel IS for sends */ 3507 i = 1; 3508 if (color) i=0; 3509 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3510 /* get back IS */ 3511 *is_sends = ranks_send_to; 3512 PetscFunctionReturn(0); 3513 } 3514 3515 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3516 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "MatISSubassemble" 3519 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3520 { 3521 Mat local_mat; 3522 IS is_sends_internal; 3523 PetscInt rows,cols,new_local_rows; 3524 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3525 PetscBool ismatis,isdense,newisdense,destroy_mat; 3526 ISLocalToGlobalMapping l2gmap; 3527 PetscInt* l2gmap_indices; 3528 const PetscInt* is_indices; 3529 MatType new_local_type; 3530 /* buffers */ 3531 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3532 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3533 PetscInt *recv_buffer_idxs_local; 3534 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3535 /* MPI */ 3536 MPI_Comm comm,comm_n; 3537 PetscSubcomm subcomm; 3538 PetscMPIInt n_sends,n_recvs,commsize; 3539 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3540 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3541 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3542 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3543 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3544 PetscErrorCode ierr; 3545 3546 PetscFunctionBegin; 3547 /* TODO: add missing checks */ 3548 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3549 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3550 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3551 PetscValidLogicalCollectiveInt(mat,nis,7); 3552 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3553 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3554 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3555 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3556 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3557 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3558 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3559 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3560 PetscInt mrows,mcols,mnrows,mncols; 3561 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3562 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3563 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3564 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3565 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3566 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3567 } 3568 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3569 PetscValidLogicalCollectiveInt(mat,bs,0); 3570 /* prepare IS for sending if not provided */ 3571 if (!is_sends) { 3572 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3573 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3574 } else { 3575 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3576 is_sends_internal = is_sends; 3577 } 3578 3579 /* get comm */ 3580 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3581 3582 /* compute number of sends */ 3583 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3584 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3585 3586 /* compute number of receives */ 3587 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3588 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3589 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3590 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3591 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3592 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3593 ierr = PetscFree(iflags);CHKERRQ(ierr); 3594 3595 /* restrict comm if requested */ 3596 subcomm = 0; 3597 destroy_mat = PETSC_FALSE; 3598 if (restrict_comm) { 3599 PetscMPIInt color,subcommsize; 3600 3601 color = 0; 3602 if (restrict_full) { 3603 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3604 } else { 3605 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3606 } 3607 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3608 subcommsize = commsize - subcommsize; 3609 /* check if reuse has been requested */ 3610 if (reuse == MAT_REUSE_MATRIX) { 3611 if (*mat_n) { 3612 PetscMPIInt subcommsize2; 3613 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3614 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3615 comm_n = PetscObjectComm((PetscObject)*mat_n); 3616 } else { 3617 comm_n = PETSC_COMM_SELF; 3618 } 3619 } else { /* MAT_INITIAL_MATRIX */ 3620 PetscMPIInt rank; 3621 3622 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3623 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3624 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3625 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3626 comm_n = PetscSubcommChild(subcomm); 3627 } 3628 /* flag to destroy *mat_n if not significative */ 3629 if (color) destroy_mat = PETSC_TRUE; 3630 } else { 3631 comm_n = comm; 3632 } 3633 3634 /* prepare send/receive buffers */ 3635 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3636 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3637 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3638 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3639 if (nis) { 3640 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3641 } 3642 3643 /* Get data from local matrices */ 3644 if (!isdense) { 3645 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3646 /* TODO: See below some guidelines on how to prepare the local buffers */ 3647 /* 3648 send_buffer_vals should contain the raw values of the local matrix 3649 send_buffer_idxs should contain: 3650 - MatType_PRIVATE type 3651 - PetscInt size_of_l2gmap 3652 - PetscInt global_row_indices[size_of_l2gmap] 3653 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3654 */ 3655 } else { 3656 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3657 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3658 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3659 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3660 send_buffer_idxs[1] = i; 3661 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3662 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3663 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3664 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3665 for (i=0;i<n_sends;i++) { 3666 ilengths_vals[is_indices[i]] = len*len; 3667 ilengths_idxs[is_indices[i]] = len+2; 3668 } 3669 } 3670 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3671 /* additional is (if any) */ 3672 if (nis) { 3673 PetscMPIInt psum; 3674 PetscInt j; 3675 for (j=0,psum=0;j<nis;j++) { 3676 PetscInt plen; 3677 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3678 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3679 psum += len+1; /* indices + lenght */ 3680 } 3681 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3682 for (j=0,psum=0;j<nis;j++) { 3683 PetscInt plen; 3684 const PetscInt *is_array_idxs; 3685 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3686 send_buffer_idxs_is[psum] = plen; 3687 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3688 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3689 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3690 psum += plen+1; /* indices + lenght */ 3691 } 3692 for (i=0;i<n_sends;i++) { 3693 ilengths_idxs_is[is_indices[i]] = psum; 3694 } 3695 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3696 } 3697 3698 buf_size_idxs = 0; 3699 buf_size_vals = 0; 3700 buf_size_idxs_is = 0; 3701 for (i=0;i<n_recvs;i++) { 3702 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3703 buf_size_vals += (PetscInt)olengths_vals[i]; 3704 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3705 } 3706 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3707 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3708 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3709 3710 /* get new tags for clean communications */ 3711 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3712 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3713 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3714 3715 /* allocate for requests */ 3716 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3717 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3718 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3719 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3720 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3721 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3722 3723 /* communications */ 3724 ptr_idxs = recv_buffer_idxs; 3725 ptr_vals = recv_buffer_vals; 3726 ptr_idxs_is = recv_buffer_idxs_is; 3727 for (i=0;i<n_recvs;i++) { 3728 source_dest = onodes[i]; 3729 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3730 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3731 ptr_idxs += olengths_idxs[i]; 3732 ptr_vals += olengths_vals[i]; 3733 if (nis) { 3734 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3735 ptr_idxs_is += olengths_idxs_is[i]; 3736 } 3737 } 3738 for (i=0;i<n_sends;i++) { 3739 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3740 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3741 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3742 if (nis) { 3743 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3744 } 3745 } 3746 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3747 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3748 3749 /* assemble new l2g map */ 3750 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3751 ptr_idxs = recv_buffer_idxs; 3752 new_local_rows = 0; 3753 for (i=0;i<n_recvs;i++) { 3754 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3755 ptr_idxs += olengths_idxs[i]; 3756 } 3757 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3758 ptr_idxs = recv_buffer_idxs; 3759 new_local_rows = 0; 3760 for (i=0;i<n_recvs;i++) { 3761 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3762 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3763 ptr_idxs += olengths_idxs[i]; 3764 } 3765 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3766 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3767 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3768 3769 /* infer new local matrix type from received local matrices type */ 3770 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3771 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3772 if (n_recvs) { 3773 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3774 ptr_idxs = recv_buffer_idxs; 3775 for (i=0;i<n_recvs;i++) { 3776 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3777 new_local_type_private = MATAIJ_PRIVATE; 3778 break; 3779 } 3780 ptr_idxs += olengths_idxs[i]; 3781 } 3782 switch (new_local_type_private) { 3783 case MATDENSE_PRIVATE: 3784 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3785 new_local_type = MATSEQAIJ; 3786 bs = 1; 3787 } else { /* if I receive only 1 dense matrix */ 3788 new_local_type = MATSEQDENSE; 3789 bs = 1; 3790 } 3791 break; 3792 case MATAIJ_PRIVATE: 3793 new_local_type = MATSEQAIJ; 3794 bs = 1; 3795 break; 3796 case MATBAIJ_PRIVATE: 3797 new_local_type = MATSEQBAIJ; 3798 break; 3799 case MATSBAIJ_PRIVATE: 3800 new_local_type = MATSEQSBAIJ; 3801 break; 3802 default: 3803 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3804 break; 3805 } 3806 } else { /* by default, new_local_type is seqdense */ 3807 new_local_type = MATSEQDENSE; 3808 bs = 1; 3809 } 3810 3811 /* create MATIS object if needed */ 3812 if (reuse == MAT_INITIAL_MATRIX) { 3813 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3814 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3815 } else { 3816 /* it also destroys the local matrices */ 3817 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3818 } 3819 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3820 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3821 3822 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3823 3824 /* Global to local map of received indices */ 3825 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3826 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3827 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3828 3829 /* restore attributes -> type of incoming data and its size */ 3830 buf_size_idxs = 0; 3831 for (i=0;i<n_recvs;i++) { 3832 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3833 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3834 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3835 } 3836 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3837 3838 /* set preallocation */ 3839 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3840 if (!newisdense) { 3841 PetscInt *new_local_nnz=0; 3842 3843 ptr_vals = recv_buffer_vals; 3844 ptr_idxs = recv_buffer_idxs_local; 3845 if (n_recvs) { 3846 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3847 } 3848 for (i=0;i<n_recvs;i++) { 3849 PetscInt j; 3850 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3851 for (j=0;j<*(ptr_idxs+1);j++) { 3852 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3853 } 3854 } else { 3855 /* TODO */ 3856 } 3857 ptr_idxs += olengths_idxs[i]; 3858 } 3859 if (new_local_nnz) { 3860 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3861 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3862 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3863 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3864 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3865 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3866 } else { 3867 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3868 } 3869 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3870 } else { 3871 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3872 } 3873 3874 /* set values */ 3875 ptr_vals = recv_buffer_vals; 3876 ptr_idxs = recv_buffer_idxs_local; 3877 for (i=0;i<n_recvs;i++) { 3878 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3879 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3880 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3881 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3882 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3883 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3884 } else { 3885 /* TODO */ 3886 } 3887 ptr_idxs += olengths_idxs[i]; 3888 ptr_vals += olengths_vals[i]; 3889 } 3890 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3895 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3896 3897 #if 0 3898 if (!restrict_comm) { /* check */ 3899 Vec lvec,rvec; 3900 PetscReal infty_error; 3901 3902 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3903 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3904 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3905 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3906 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3907 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3908 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3909 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3910 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3911 } 3912 #endif 3913 3914 /* assemble new additional is (if any) */ 3915 if (nis) { 3916 PetscInt **temp_idxs,*count_is,j,psum; 3917 3918 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3919 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3920 ptr_idxs = recv_buffer_idxs_is; 3921 psum = 0; 3922 for (i=0;i<n_recvs;i++) { 3923 for (j=0;j<nis;j++) { 3924 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3925 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3926 psum += plen; 3927 ptr_idxs += plen+1; /* shift pointer to received data */ 3928 } 3929 } 3930 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3931 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3932 for (i=1;i<nis;i++) { 3933 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3934 } 3935 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3936 ptr_idxs = recv_buffer_idxs_is; 3937 for (i=0;i<n_recvs;i++) { 3938 for (j=0;j<nis;j++) { 3939 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3940 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3941 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3942 ptr_idxs += plen+1; /* shift pointer to received data */ 3943 } 3944 } 3945 for (i=0;i<nis;i++) { 3946 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3947 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3948 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3949 } 3950 ierr = PetscFree(count_is);CHKERRQ(ierr); 3951 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3952 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3953 } 3954 /* free workspace */ 3955 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3956 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3957 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3958 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3959 if (isdense) { 3960 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3961 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3962 } else { 3963 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3964 } 3965 if (nis) { 3966 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3967 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3968 } 3969 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3970 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3971 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3972 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3973 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3974 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3975 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3976 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3977 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3978 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3979 ierr = PetscFree(onodes);CHKERRQ(ierr); 3980 if (nis) { 3981 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3982 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3983 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3984 } 3985 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3986 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3987 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3988 for (i=0;i<nis;i++) { 3989 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3990 } 3991 *mat_n = NULL; 3992 } 3993 PetscFunctionReturn(0); 3994 } 3995 3996 /* temporary hack into ksp private data structure */ 3997 #include <petsc/private/kspimpl.h> 3998 3999 #undef __FUNCT__ 4000 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4001 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4002 { 4003 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4004 PC_IS *pcis = (PC_IS*)pc->data; 4005 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4006 MatNullSpace CoarseNullSpace=NULL; 4007 ISLocalToGlobalMapping coarse_islg; 4008 IS coarse_is,*isarray; 4009 PetscInt i,im_active=-1,active_procs=-1; 4010 PetscInt nis,nisdofs,nisneu; 4011 PC pc_temp; 4012 PCType coarse_pc_type; 4013 KSPType coarse_ksp_type; 4014 PetscBool multilevel_requested,multilevel_allowed; 4015 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4016 Mat t_coarse_mat_is; 4017 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4018 PetscMPIInt all_procs; 4019 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4020 PetscBool compute_vecs = PETSC_FALSE; 4021 PetscScalar *array; 4022 PetscErrorCode ierr; 4023 4024 PetscFunctionBegin; 4025 /* Assign global numbering to coarse dofs */ 4026 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4027 PetscInt ocoarse_size; 4028 compute_vecs = PETSC_TRUE; 4029 ocoarse_size = pcbddc->coarse_size; 4030 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4031 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4032 /* see if we can avoid some work */ 4033 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4034 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4035 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4036 PC pc; 4037 PetscBool isbddc; 4038 4039 /* temporary workaround since PCBDDC does not have a reset method so far */ 4040 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4041 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4042 if (isbddc) { 4043 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4044 } 4045 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4046 coarse_reuse = PETSC_FALSE; 4047 } else { /* we can safely reuse already computed coarse matrix */ 4048 coarse_reuse = PETSC_TRUE; 4049 } 4050 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4051 coarse_reuse = PETSC_FALSE; 4052 } 4053 /* reset any subassembling information */ 4054 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4055 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4056 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4057 coarse_reuse = PETSC_TRUE; 4058 } 4059 4060 /* count "active" (i.e. with positive local size) and "void" processes */ 4061 im_active = !!(pcis->n); 4062 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4063 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4064 void_procs = all_procs-active_procs; 4065 csin_type_simple = PETSC_TRUE; 4066 redist = PETSC_FALSE; 4067 if (pcbddc->current_level && void_procs) { 4068 csin_ml = PETSC_TRUE; 4069 ncoarse_ml = void_procs; 4070 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4071 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4072 csin_ds = PETSC_TRUE; 4073 ncoarse_ds = pcbddc->redistribute_coarse; 4074 redist = PETSC_TRUE; 4075 } else { 4076 csin_ds = PETSC_TRUE; 4077 ncoarse_ds = active_procs; 4078 redist = PETSC_TRUE; 4079 } 4080 } else { 4081 csin_ml = PETSC_FALSE; 4082 ncoarse_ml = all_procs; 4083 if (void_procs) { 4084 csin_ds = PETSC_TRUE; 4085 ncoarse_ds = void_procs; 4086 csin_type_simple = PETSC_FALSE; 4087 } else { 4088 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4089 csin_ds = PETSC_TRUE; 4090 ncoarse_ds = pcbddc->redistribute_coarse; 4091 redist = PETSC_TRUE; 4092 } else { 4093 csin_ds = PETSC_FALSE; 4094 ncoarse_ds = all_procs; 4095 } 4096 } 4097 } 4098 4099 /* 4100 test if we can go multilevel: three conditions must be satisfied: 4101 - we have not exceeded the number of levels requested 4102 - we can actually subassemble the active processes 4103 - we can find a suitable number of MPI processes where we can place the subassembled problem 4104 */ 4105 multilevel_allowed = PETSC_FALSE; 4106 multilevel_requested = PETSC_FALSE; 4107 if (pcbddc->current_level < pcbddc->max_levels) { 4108 multilevel_requested = PETSC_TRUE; 4109 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4110 multilevel_allowed = PETSC_FALSE; 4111 } else { 4112 multilevel_allowed = PETSC_TRUE; 4113 } 4114 } 4115 /* determine number of process partecipating to coarse solver */ 4116 if (multilevel_allowed) { 4117 ncoarse = ncoarse_ml; 4118 csin = csin_ml; 4119 redist = PETSC_FALSE; 4120 } else { 4121 ncoarse = ncoarse_ds; 4122 csin = csin_ds; 4123 } 4124 4125 /* creates temporary l2gmap and IS for coarse indexes */ 4126 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4127 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4128 4129 /* creates temporary MATIS object for coarse matrix */ 4130 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4131 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4132 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4133 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4134 #if 0 4135 { 4136 PetscViewer viewer; 4137 char filename[256]; 4138 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4139 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4140 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4141 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4142 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4143 } 4144 #endif 4145 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4146 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4147 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4150 4151 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4152 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4153 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4154 const PetscInt *idxs; 4155 ISLocalToGlobalMapping tmap; 4156 4157 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4158 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4159 /* allocate space for temporary storage */ 4160 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4161 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4162 /* allocate for IS array */ 4163 nisdofs = pcbddc->n_ISForDofsLocal; 4164 nisneu = !!pcbddc->NeumannBoundariesLocal; 4165 nis = nisdofs + nisneu; 4166 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4167 /* dofs splitting */ 4168 for (i=0;i<nisdofs;i++) { 4169 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4170 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4171 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4172 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4173 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4174 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4176 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4177 } 4178 /* neumann boundaries */ 4179 if (pcbddc->NeumannBoundariesLocal) { 4180 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4181 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4182 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4183 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4184 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4185 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4187 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4188 } 4189 /* free memory */ 4190 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4191 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4192 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4193 } else { 4194 nis = 0; 4195 nisdofs = 0; 4196 nisneu = 0; 4197 isarray = NULL; 4198 } 4199 /* destroy no longer needed map */ 4200 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4201 4202 /* restrict on coarse candidates (if needed) */ 4203 coarse_mat_is = NULL; 4204 if (csin) { 4205 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4206 if (redist) { 4207 PetscMPIInt rank; 4208 PetscInt spc,n_spc_p1,dest[1],destsize; 4209 4210 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4211 spc = active_procs/ncoarse; 4212 n_spc_p1 = active_procs%ncoarse; 4213 if (im_active) { 4214 destsize = 1; 4215 if (rank > n_spc_p1*(spc+1)-1) { 4216 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4217 } else { 4218 dest[0] = rank/(spc+1); 4219 } 4220 } else { 4221 destsize = 0; 4222 } 4223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4224 } else if (csin_type_simple) { 4225 PetscMPIInt rank; 4226 PetscInt issize,isidx; 4227 4228 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4229 if (im_active) { 4230 issize = 1; 4231 isidx = (PetscInt)rank; 4232 } else { 4233 issize = 0; 4234 isidx = -1; 4235 } 4236 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4237 } else { /* get a suitable subassembling pattern from MATIS code */ 4238 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4239 } 4240 4241 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4242 if (!redist || ncoarse <= void_procs) { 4243 PetscInt ncoarse_cand,tissize,*nisindices; 4244 PetscInt *coarse_candidates; 4245 const PetscInt* tisindices; 4246 4247 /* get coarse candidates' ranks in pc communicator */ 4248 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4249 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4250 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4251 if (!coarse_candidates[i]) { 4252 coarse_candidates[ncoarse_cand++]=i; 4253 } 4254 } 4255 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4256 4257 4258 if (pcbddc->dbg_flag) { 4259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4261 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4263 for (i=0;i<ncoarse_cand;i++) { 4264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4265 } 4266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4268 } 4269 /* shift the pattern on coarse candidates */ 4270 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4271 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4272 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4273 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4274 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4275 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4276 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4277 } 4278 if (pcbddc->dbg_flag) { 4279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4281 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4283 } 4284 } 4285 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4286 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4287 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4288 } else { /* this is the last level, so use just receiving processes in subcomm */ 4289 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4290 } 4291 } else { 4292 if (pcbddc->dbg_flag) { 4293 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4295 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4296 } 4297 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4298 coarse_mat_is = t_coarse_mat_is; 4299 } 4300 4301 /* create local to global scatters for coarse problem */ 4302 if (compute_vecs) { 4303 PetscInt lrows; 4304 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4305 if (coarse_mat_is) { 4306 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4307 } else { 4308 lrows = 0; 4309 } 4310 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4311 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4312 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4313 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4314 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4315 } 4316 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4317 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4318 4319 /* set defaults for coarse KSP and PC */ 4320 if (multilevel_allowed) { 4321 coarse_ksp_type = KSPRICHARDSON; 4322 coarse_pc_type = PCBDDC; 4323 } else { 4324 coarse_ksp_type = KSPPREONLY; 4325 coarse_pc_type = PCREDUNDANT; 4326 } 4327 4328 /* print some info if requested */ 4329 if (pcbddc->dbg_flag) { 4330 if (!multilevel_allowed) { 4331 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4332 if (multilevel_requested) { 4333 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4334 } else if (pcbddc->max_levels) { 4335 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4336 } 4337 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4338 } 4339 } 4340 4341 /* create the coarse KSP object only once with defaults */ 4342 if (coarse_mat_is) { 4343 MatReuse coarse_mat_reuse; 4344 PetscViewer dbg_viewer = NULL; 4345 if (pcbddc->dbg_flag) { 4346 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4347 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4348 } 4349 if (!pcbddc->coarse_ksp) { 4350 char prefix[256],str_level[16]; 4351 size_t len; 4352 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4353 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4354 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4355 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4356 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4357 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4358 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4359 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4360 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4361 /* prefix */ 4362 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4363 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4364 if (!pcbddc->current_level) { 4365 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4366 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4367 } else { 4368 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4369 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4370 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4371 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4372 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4373 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4374 } 4375 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4376 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4377 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4378 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4379 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4380 /* allow user customization */ 4381 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4382 } 4383 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4384 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4385 if (nisdofs) { 4386 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4387 for (i=0;i<nisdofs;i++) { 4388 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4389 } 4390 } 4391 if (nisneu) { 4392 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4393 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4394 } 4395 4396 /* get some info after set from options */ 4397 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4398 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4399 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4400 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4401 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4402 isbddc = PETSC_FALSE; 4403 } 4404 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4405 if (isredundant) { 4406 KSP inner_ksp; 4407 PC inner_pc; 4408 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4409 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4410 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4411 } 4412 4413 /* assemble coarse matrix */ 4414 if (coarse_reuse) { 4415 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4416 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4417 coarse_mat_reuse = MAT_REUSE_MATRIX; 4418 } else { 4419 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4420 } 4421 if (isbddc || isnn) { 4422 if (pcbddc->coarsening_ratio > 1) { 4423 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4424 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4425 if (pcbddc->dbg_flag) { 4426 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4427 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4428 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4429 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4430 } 4431 } 4432 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4433 } else { 4434 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4435 coarse_mat = coarse_mat_is; 4436 } 4437 } else { 4438 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4439 } 4440 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4441 4442 /* propagate symmetry info of coarse matrix */ 4443 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4444 if (pc->pmat->symmetric_set) { 4445 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4446 } 4447 if (pc->pmat->hermitian_set) { 4448 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4449 } 4450 if (pc->pmat->spd_set) { 4451 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4452 } 4453 /* set operators */ 4454 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4455 if (pcbddc->dbg_flag) { 4456 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4457 } 4458 } else { /* processes non partecipating to coarse solver (if any) */ 4459 coarse_mat = 0; 4460 } 4461 ierr = PetscFree(isarray);CHKERRQ(ierr); 4462 #if 0 4463 { 4464 PetscViewer viewer; 4465 char filename[256]; 4466 sprintf(filename,"coarse_mat.m"); 4467 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4468 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4469 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4470 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4471 } 4472 #endif 4473 4474 /* Compute coarse null space (special handling by BDDC only) */ 4475 #if 0 4476 if (pcbddc->NullSpace) { 4477 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4478 } 4479 #endif 4480 4481 if (pcbddc->coarse_ksp) { 4482 Vec crhs,csol; 4483 PetscBool ispreonly; 4484 4485 if (CoarseNullSpace) { 4486 if (isbddc) { 4487 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4488 } else { 4489 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4490 } 4491 } 4492 /* setup coarse ksp */ 4493 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4494 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4495 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4496 /* hack */ 4497 if (!csol) { 4498 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4499 } 4500 if (!crhs) { 4501 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4502 } 4503 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4504 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4505 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4506 KSP check_ksp; 4507 KSPType check_ksp_type; 4508 PC check_pc; 4509 Vec check_vec,coarse_vec; 4510 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4511 PetscInt its; 4512 PetscBool compute_eigs; 4513 PetscReal *eigs_r,*eigs_c; 4514 PetscInt neigs; 4515 const char *prefix; 4516 4517 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4518 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4519 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4520 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4521 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4522 if (ispreonly) { 4523 check_ksp_type = KSPPREONLY; 4524 compute_eigs = PETSC_FALSE; 4525 } else { 4526 check_ksp_type = KSPGMRES; 4527 compute_eigs = PETSC_TRUE; 4528 } 4529 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4530 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4531 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4532 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4533 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4534 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4535 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4536 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4537 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4538 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4539 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4540 /* create random vec */ 4541 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4542 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4543 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4544 if (CoarseNullSpace) { 4545 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4546 } 4547 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4548 /* solve coarse problem */ 4549 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4550 if (CoarseNullSpace) { 4551 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4552 } 4553 /* set eigenvalue estimation if preonly has not been requested */ 4554 if (compute_eigs) { 4555 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4556 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4557 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4558 lambda_max = eigs_r[neigs-1]; 4559 lambda_min = eigs_r[0]; 4560 if (pcbddc->use_coarse_estimates) { 4561 if (lambda_max>lambda_min) { 4562 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4563 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4564 } 4565 } 4566 } 4567 4568 /* check coarse problem residual error */ 4569 if (pcbddc->dbg_flag) { 4570 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4571 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4572 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4573 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4574 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4575 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4576 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4577 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4578 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4579 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4580 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4581 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4582 if (compute_eigs) { 4583 PetscReal lambda_max_s,lambda_min_s; 4584 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4585 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4586 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4587 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4588 for (i=0;i<neigs;i++) { 4589 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4590 } 4591 } 4592 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4593 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4594 } 4595 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4596 if (compute_eigs) { 4597 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4598 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4599 } 4600 } 4601 } 4602 /* print additional info */ 4603 if (pcbddc->dbg_flag) { 4604 /* waits until all processes reaches this point */ 4605 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4606 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4607 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4608 } 4609 4610 /* free memory */ 4611 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4612 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4613 PetscFunctionReturn(0); 4614 } 4615 4616 #undef __FUNCT__ 4617 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4618 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4619 { 4620 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4621 PC_IS* pcis = (PC_IS*)pc->data; 4622 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4623 IS subset,subset_mult,subset_n; 4624 PetscInt local_size,coarse_size=0; 4625 PetscInt *local_primal_indices=NULL; 4626 const PetscInt *t_local_primal_indices; 4627 PetscErrorCode ierr; 4628 4629 PetscFunctionBegin; 4630 /* Compute global number of coarse dofs */ 4631 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4632 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4633 } 4634 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4635 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4636 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4637 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4638 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4639 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4640 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4641 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4642 if (local_size != pcbddc->local_primal_size) { 4643 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4644 } 4645 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4646 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4647 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4648 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4649 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4650 4651 /* check numbering */ 4652 if (pcbddc->dbg_flag) { 4653 PetscScalar coarsesum,*array; 4654 PetscInt i; 4655 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4656 4657 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4660 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4661 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4662 for (i=0;i<pcbddc->local_primal_size;i++) { 4663 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4664 } 4665 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4666 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4667 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4668 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4669 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4670 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4671 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4672 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4673 for (i=0;i<pcis->n;i++) { 4674 if (array[i] == 1.0) { 4675 set_error = PETSC_TRUE; 4676 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4677 } 4678 } 4679 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4680 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4681 for (i=0;i<pcis->n;i++) { 4682 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4683 } 4684 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4685 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4686 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4687 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4688 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4689 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4690 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4691 PetscInt *gidxs; 4692 4693 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4694 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4695 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4696 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4698 for (i=0;i<pcbddc->local_primal_size;i++) { 4699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4700 } 4701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4702 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4703 } 4704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4705 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4706 } 4707 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4708 /* get back data */ 4709 *coarse_size_n = coarse_size; 4710 *local_primal_indices_n = local_primal_indices; 4711 PetscFunctionReturn(0); 4712 } 4713 4714 #undef __FUNCT__ 4715 #define __FUNCT__ "PCBDDCGlobalToLocal" 4716 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4717 { 4718 IS localis_t; 4719 PetscInt i,lsize,*idxs,n; 4720 PetscScalar *vals; 4721 PetscErrorCode ierr; 4722 4723 PetscFunctionBegin; 4724 /* get indices in local ordering exploiting local to global map */ 4725 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4726 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4727 for (i=0;i<lsize;i++) vals[i] = 1.0; 4728 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4729 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4730 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4731 if (idxs) { /* multilevel guard */ 4732 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4733 } 4734 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4735 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4736 ierr = PetscFree(vals);CHKERRQ(ierr); 4737 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4738 /* now compute set in local ordering */ 4739 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4740 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4741 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4742 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4743 for (i=0,lsize=0;i<n;i++) { 4744 if (PetscRealPart(vals[i]) > 0.5) { 4745 lsize++; 4746 } 4747 } 4748 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4749 for (i=0,lsize=0;i<n;i++) { 4750 if (PetscRealPart(vals[i]) > 0.5) { 4751 idxs[lsize++] = i; 4752 } 4753 } 4754 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4755 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4756 *localis = localis_t; 4757 PetscFunctionReturn(0); 4758 } 4759 4760 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4761 #undef __FUNCT__ 4762 #define __FUNCT__ "PCBDDCMatMult_Private" 4763 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4764 { 4765 PCBDDCChange_ctx change_ctx; 4766 PetscErrorCode ierr; 4767 4768 PetscFunctionBegin; 4769 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4770 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4771 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4772 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 #undef __FUNCT__ 4777 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4778 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4779 { 4780 PCBDDCChange_ctx change_ctx; 4781 PetscErrorCode ierr; 4782 4783 PetscFunctionBegin; 4784 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4785 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4786 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4787 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4788 PetscFunctionReturn(0); 4789 } 4790 4791 #undef __FUNCT__ 4792 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4793 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4794 { 4795 PC_IS *pcis=(PC_IS*)pc->data; 4796 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4797 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4798 Mat S_j; 4799 PetscInt *used_xadj,*used_adjncy; 4800 PetscBool free_used_adj; 4801 PetscErrorCode ierr; 4802 4803 PetscFunctionBegin; 4804 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4805 free_used_adj = PETSC_FALSE; 4806 if (pcbddc->sub_schurs_layers == -1) { 4807 used_xadj = NULL; 4808 used_adjncy = NULL; 4809 } else { 4810 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4811 used_xadj = pcbddc->mat_graph->xadj; 4812 used_adjncy = pcbddc->mat_graph->adjncy; 4813 } else if (pcbddc->computed_rowadj) { 4814 used_xadj = pcbddc->mat_graph->xadj; 4815 used_adjncy = pcbddc->mat_graph->adjncy; 4816 } else { 4817 PetscBool flg_row=PETSC_FALSE; 4818 const PetscInt *xadj,*adjncy; 4819 PetscInt nvtxs; 4820 4821 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4822 if (flg_row) { 4823 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4824 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4825 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4826 free_used_adj = PETSC_TRUE; 4827 } else { 4828 pcbddc->sub_schurs_layers = -1; 4829 used_xadj = NULL; 4830 used_adjncy = NULL; 4831 } 4832 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4833 } 4834 } 4835 4836 /* setup sub_schurs data */ 4837 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4838 if (!sub_schurs->use_mumps) { 4839 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4840 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4841 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4842 } else { 4843 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4844 PetscBool isseqaij; 4845 if (!pcbddc->use_vertices && reuse_solvers) { 4846 PetscInt n_vertices; 4847 4848 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4849 reuse_solvers = (PetscBool)!n_vertices; 4850 } 4851 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4852 if (!isseqaij) { 4853 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4854 if (matis->A == pcbddc->local_mat) { 4855 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4856 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4857 } else { 4858 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4859 } 4860 } 4861 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4862 } 4863 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4864 4865 /* free adjacency */ 4866 if (free_used_adj) { 4867 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4868 } 4869 PetscFunctionReturn(0); 4870 } 4871 4872 #undef __FUNCT__ 4873 #define __FUNCT__ "PCBDDCInitSubSchurs" 4874 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4875 { 4876 PC_IS *pcis=(PC_IS*)pc->data; 4877 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4878 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4879 PCBDDCGraph graph; 4880 PetscErrorCode ierr; 4881 4882 PetscFunctionBegin; 4883 /* attach interface graph for determining subsets */ 4884 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4885 IS verticesIS,verticescomm; 4886 PetscInt vsize,*idxs; 4887 4888 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4889 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4890 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4891 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4892 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4893 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4894 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4895 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4896 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4897 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4898 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4899 /* 4900 if (pcbddc->dbg_flag) { 4901 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4902 } 4903 */ 4904 } else { 4905 graph = pcbddc->mat_graph; 4906 } 4907 4908 /* sub_schurs init */ 4909 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4910 4911 /* free graph struct */ 4912 if (pcbddc->sub_schurs_rebuild) { 4913 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4914 } 4915 PetscFunctionReturn(0); 4916 } 4917