1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 472 { 473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 474 PC_IS *pcis = (PC_IS*)pc->data; 475 VecType impVecType; 476 PetscInt n_constraints,n_R,old_size; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->ConstraintMatrix) { 481 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 482 } 483 /* get sizes */ 484 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 485 n_R = pcis->n-pcbddc->n_vertices; 486 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 487 /* local work vectors (try to avoid unneeded work)*/ 488 /* R nodes */ 489 old_size = -1; 490 if (pcbddc->vec1_R) { 491 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 492 } 493 if (n_R != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 499 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 500 } 501 /* local primal dofs */ 502 old_size = -1; 503 if (pcbddc->vec1_P) { 504 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 505 } 506 if (pcbddc->local_primal_size != old_size) { 507 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 508 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 510 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 511 } 512 /* local explicit constraints */ 513 old_size = -1; 514 if (pcbddc->vec1_C) { 515 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 516 } 517 if (n_constraints && n_constraints != old_size) { 518 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 519 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 520 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 521 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSetUpCorrection" 528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 529 { 530 PetscErrorCode ierr; 531 /* pointers to pcis and pcbddc */ 532 PC_IS* pcis = (PC_IS*)pc->data; 533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 535 /* submatrices of local problem */ 536 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 537 /* submatrices of local coarse problem */ 538 Mat S_VV,S_CV,S_VC,S_CC; 539 /* working matrices */ 540 Mat C_CR; 541 /* additional working stuff */ 542 PC pc_R; 543 Mat F; 544 PetscBool isLU,isCHOL,isILU; 545 546 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 547 PetscScalar *work; 548 PetscInt *idx_V_B; 549 PetscInt n,n_vertices,n_constraints; 550 PetscInt i,n_R,n_D,n_B; 551 PetscBool unsymmetric_check; 552 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 553 MatType impMatType; 554 /* some shortcuts to scalars */ 555 PetscScalar one=1.0,m_one=-1.0; 556 557 PetscFunctionBegin; 558 n_vertices = pcbddc->n_vertices; 559 n_constraints = pcbddc->local_primal_size-n_vertices; 560 /* Set Non-overlapping dimensions */ 561 n_B = pcis->n_B; 562 n_D = pcis->n - n_B; 563 n_R = pcis->n - n_vertices; 564 565 /* Set types for local objects needed by BDDC precondtioner */ 566 impMatType = MATSEQDENSE; 567 568 /* vertices in boundary numbering */ 569 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 570 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 571 if (i != n_vertices) { 572 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 573 } 574 575 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 576 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 586 unsymmetric_check = PETSC_FALSE; 587 /* allocate workspace */ 588 n = 0; 589 if (n_constraints) { 590 n += n_R*n_constraints; 591 } 592 if (n_vertices) { 593 n = PetscMax(2*n_R*n_vertices,n); 594 n = PetscMax((n_R+n_B)*n_vertices,n); 595 } 596 if (!pcbddc->symmetric_primal) { 597 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 598 unsymmetric_check = PETSC_TRUE; 599 } 600 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 601 602 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 603 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 607 if (isLU || isILU || isCHOL) { 608 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 609 } else if (sub_schurs->reuse_mumps) { 610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 611 MatFactorType type; 612 613 F = reuse_mumps->F; 614 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 615 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 616 } else { 617 F = NULL; 618 } 619 620 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 621 if (n_constraints) { 622 Mat M1,M2,M3; 623 Mat auxmat; 624 IS is_aux; 625 PetscScalar *array,*array2; 626 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 630 /* Extract constraints on R nodes: C_{CR} */ 631 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 632 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 633 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 634 635 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 636 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 637 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 const PetscScalar *row_cmat_values; 640 const PetscInt *row_cmat_indices; 641 PetscInt size_of_constraint,j; 642 643 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 644 for (j=0;j<size_of_constraint;j++) { 645 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 646 } 647 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 650 if (F) { 651 Mat B; 652 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 654 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 655 ierr = MatDestroy(&B);CHKERRQ(ierr); 656 } else { 657 PetscScalar *marr; 658 659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 660 for (i=0;i<n_constraints;i++) { 661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 666 } 667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 668 } 669 if (!pcbddc->switch_static) { 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 for (i=0;i<n_constraints;i++) { 674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 680 } 681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 683 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 684 } else { 685 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 686 pcbddc->local_auxmat2 = local_auxmat2_R; 687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 694 if (isCHOL) { 695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 696 } else { 697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 698 } 699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 705 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 706 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 709 } 710 /* Get submatrices from subdomain matrix */ 711 if (n_vertices) { 712 IS is_aux; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 728 } 729 730 /* Matrix of coarse basis functions (local) */ 731 if (pcbddc->coarse_phi_B) { 732 PetscInt on_B,on_primal,on_D=n_D; 733 if (pcbddc->coarse_phi_D) { 734 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 735 } 736 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 737 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 738 PetscScalar *marray; 739 740 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 741 ierr = PetscFree(marray);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 746 } 747 } 748 749 if (!pcbddc->coarse_phi_B) { 750 PetscScalar *marray; 751 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 n += n_D*pcbddc->local_primal_size; 755 } 756 if (!pcbddc->symmetric_primal) { 757 n *= 2; 758 } 759 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 761 n = n_B*pcbddc->local_primal_size; 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 n += n_D*pcbddc->local_primal_size; 765 } 766 if (!pcbddc->symmetric_primal) { 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 769 n = n_B*pcbddc->local_primal_size; 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 774 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 775 if (pcbddc->switch_static || pcbddc->dbg_flag) { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 777 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 778 } 779 } 780 } 781 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 782 /* vertices */ 783 if (n_vertices) { 784 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 787 if (n_R) { 788 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 789 PetscBLASInt B_N,B_one = 1; 790 PetscScalar *x,*y; 791 PetscBool isseqaij; 792 793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 794 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 796 if (F) { /* TODO could be optimized for symmetric problems */ 797 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 798 } else { 799 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 803 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 810 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 811 if (n_constraints) { 812 Mat B; 813 814 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 815 for (i=0;i<n_vertices;i++) { 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 817 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 818 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 822 } 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 828 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 829 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 831 ierr = MatDestroy(&B);CHKERRQ(ierr); 832 } 833 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 834 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 835 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 836 } 837 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 838 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 839 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 842 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 843 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 845 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 847 } else { 848 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_vertices;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 y[n_B*i+idx_V_B[i]] = 1.0; 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 863 864 if (pcbddc->switch_static || pcbddc->dbg_flag) { 865 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 } 872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 873 } 874 /* if n_R == 0 the object is not destroyed */ 875 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 876 } 877 878 if (n_constraints) { 879 Mat B; 880 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 882 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 883 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 if (n_vertices) { 886 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 887 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 888 } else { 889 Mat S_VCt; 890 891 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 892 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 893 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<n_constraints;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 909 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 910 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 911 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 } 916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 917 } 918 } 919 if (n_constraints) { 920 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 921 } 922 923 /* compute other basis functions for non-symmetric problems */ 924 if (!pcbddc->symmetric_primal) { 925 926 if (n_constraints) { 927 Mat S_CCT,B_C; 928 929 /* this is a lazy thing */ 930 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 932 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 933 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 934 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 935 if (n_vertices) { 936 Mat B_V,S_VCT; 937 938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 940 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 941 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 943 } 944 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 945 } else { /* if there are no constraints, reset work */ 946 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 if (n_vertices && n_R) { 949 Mat A_VRT; 950 PetscScalar *marray; 951 PetscBLASInt B_N,B_one = 1; 952 953 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 954 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 955 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 957 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 958 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 960 } 961 962 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 965 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 966 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 969 } 970 } else { 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 973 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 977 } 978 } 979 /* coarse basis functions */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 PetscScalar *y; 982 983 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 984 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 if (i<n_vertices) { 989 y[n_B*i+idx_V_B[i]] = 1.0; 990 } 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 992 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 993 994 if (pcbddc->switch_static || pcbddc->dbg_flag) { 995 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1001 } 1002 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1003 } 1004 } 1005 /* free memory */ 1006 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1008 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (n_vertices) { 1013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1014 } 1015 if (n_constraints) { 1016 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1017 } 1018 /* Checking coarse_sub_mat and coarse basis functios */ 1019 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1020 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1021 if (pcbddc->dbg_flag) { 1022 Mat coarse_sub_mat; 1023 Mat AUXMAT,TM1,TM2,TM3,TM4; 1024 Mat coarse_phi_D,coarse_phi_B; 1025 Mat coarse_psi_D,coarse_psi_B; 1026 Mat A_II,A_BB,A_IB,A_BI; 1027 Mat C_B,CPHI; 1028 IS is_dummy; 1029 Vec mones; 1030 MatType checkmattype=MATSEQAIJ; 1031 PetscReal real_value; 1032 1033 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1034 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1035 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1042 } 1043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1048 if (unsymmetric_check) { 1049 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1063 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 } 1071 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 1080 /* check constraints */ 1081 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1082 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1083 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1084 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1085 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1086 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1087 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 if (unsymmetric_check) { 1090 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 } 1096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1097 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1098 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1099 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1103 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1107 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1112 if (unsymmetric_check) { 1113 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1115 } 1116 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1117 } 1118 /* get back data */ 1119 *coarse_submat_vals_n = coarse_submat_vals; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1126 { 1127 Mat *work_mat; 1128 IS isrow_s,iscol_s; 1129 PetscBool rsorted,csorted; 1130 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1135 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1136 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1137 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1138 1139 if (!rsorted) { 1140 const PetscInt *idxs; 1141 PetscInt *idxs_sorted,i; 1142 1143 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1144 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1145 for (i=0;i<rsize;i++) { 1146 idxs_perm_r[i] = i; 1147 } 1148 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1149 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1150 for (i=0;i<rsize;i++) { 1151 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1152 } 1153 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1154 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1155 } else { 1156 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1157 isrow_s = isrow; 1158 } 1159 1160 if (!csorted) { 1161 if (isrow == iscol) { 1162 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1163 iscol_s = isrow_s; 1164 } else { 1165 const PetscInt *idxs; 1166 PetscInt *idxs_sorted,i; 1167 1168 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1170 for (i=0;i<csize;i++) { 1171 idxs_perm_c[i] = i; 1172 } 1173 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1174 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1175 for (i=0;i<csize;i++) { 1176 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1177 } 1178 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1179 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 iscol_s = iscol; 1184 } 1185 1186 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1187 1188 if (!rsorted || !csorted) { 1189 Mat new_mat; 1190 IS is_perm_r,is_perm_c; 1191 1192 if (!rsorted) { 1193 PetscInt *idxs_r,i; 1194 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1195 for (i=0;i<rsize;i++) { 1196 idxs_r[idxs_perm_r[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1200 } else { 1201 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1202 } 1203 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1204 1205 if (!csorted) { 1206 if (isrow_s == iscol_s) { 1207 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1208 is_perm_c = is_perm_r; 1209 } else { 1210 PetscInt *idxs_c,i; 1211 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1212 for (i=0;i<csize;i++) { 1213 idxs_c[idxs_perm_c[i]] = i; 1214 } 1215 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 } else { 1219 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1222 1223 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1225 work_mat[0] = new_mat; 1226 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1227 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1228 } 1229 1230 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1231 *B = work_mat[0]; 1232 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1234 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1241 { 1242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1243 PC_IS* pcis = (PC_IS*)pc->data; 1244 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1245 Mat new_mat; 1246 IS is_local,is_global; 1247 PetscInt local_size; 1248 PetscBool isseqaij; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1253 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1254 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1255 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1256 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1257 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1258 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1259 1260 /* check */ 1261 if (pcbddc->dbg_flag) { 1262 Vec x,x_change; 1263 PetscReal error; 1264 1265 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1266 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1267 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1274 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1277 ierr = VecDestroy(&x);CHKERRQ(ierr); 1278 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1279 } 1280 1281 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1282 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1283 if (isseqaij) { 1284 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 } else { 1286 Mat work_mat; 1287 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1288 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1289 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1290 } 1291 if (matis->A->symmetric_set) { 1292 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1293 #if !defined(PETSC_USE_COMPLEX) 1294 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1295 #endif 1296 } 1297 /* 1298 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1299 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1300 */ 1301 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1307 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1308 { 1309 PC_IS* pcis = (PC_IS*)(pc->data); 1310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1311 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1312 PetscInt *idx_R_local=NULL; 1313 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1314 PetscInt vbs,bs; 1315 PetscBT bitmask=NULL; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* 1320 No need to setup local scatters if 1321 - primal space is unchanged 1322 AND 1323 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1324 AND 1325 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1326 */ 1327 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1328 PetscFunctionReturn(0); 1329 } 1330 /* destroy old objects */ 1331 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1333 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1334 /* Set Non-overlapping dimensions */ 1335 n_B = pcis->n_B; 1336 n_D = pcis->n - n_B; 1337 n_vertices = pcbddc->n_vertices; 1338 1339 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1340 1341 /* create auxiliary bitmask and allocate workspace */ 1342 if (!sub_schurs->reuse_mumps) { 1343 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1344 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1345 for (i=0;i<n_vertices;i++) { 1346 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1347 } 1348 1349 for (i=0, n_R=0; i<pcis->n; i++) { 1350 if (!PetscBTLookup(bitmask,i)) { 1351 idx_R_local[n_R++] = i; 1352 } 1353 } 1354 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1355 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1356 1357 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1358 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1359 } 1360 1361 /* Block code */ 1362 vbs = 1; 1363 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1364 if (bs>1 && !(n_vertices%bs)) { 1365 PetscBool is_blocked = PETSC_TRUE; 1366 PetscInt *vary; 1367 if (!sub_schurs->reuse_mumps) { 1368 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1369 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1370 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1371 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1372 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1373 for (i=0; i<pcis->n/bs; i++) { 1374 if (vary[i]!=0 && vary[i]!=bs) { 1375 is_blocked = PETSC_FALSE; 1376 break; 1377 } 1378 } 1379 ierr = PetscFree(vary);CHKERRQ(ierr); 1380 } else { 1381 /* Verify directly the R set */ 1382 for (i=0; i<n_R/bs; i++) { 1383 PetscInt j,node=idx_R_local[bs*i]; 1384 for (j=1; j<bs; j++) { 1385 if (node != idx_R_local[bs*i+j]-j) { 1386 is_blocked = PETSC_FALSE; 1387 break; 1388 } 1389 } 1390 } 1391 } 1392 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1393 vbs = bs; 1394 for (i=0;i<n_R/vbs;i++) { 1395 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1396 } 1397 } 1398 } 1399 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1400 if (sub_schurs->reuse_mumps) { 1401 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1402 1403 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1404 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1405 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1406 reuse_mumps->is_R = pcbddc->is_R_local; 1407 } else { 1408 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1409 } 1410 1411 /* print some info if requested */ 1412 if (pcbddc->dbg_flag) { 1413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1420 } 1421 1422 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1423 if (!sub_schurs->reuse_mumps) { 1424 IS is_aux1,is_aux2; 1425 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1426 1427 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1429 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1430 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0; i<n_D; i++) { 1432 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1433 } 1434 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_R; i++) { 1436 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1437 aux_array1[j++] = i; 1438 } 1439 } 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1441 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 for (i=0, j=0; i<n_B; i++) { 1443 if (!PetscBTLookup(bitmask,is_indices[i])) { 1444 aux_array2[j++] = i; 1445 } 1446 } 1447 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1448 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1449 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1451 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1452 1453 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1454 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1455 for (i=0, j=0; i<n_R; i++) { 1456 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1457 aux_array1[j++] = i; 1458 } 1459 } 1460 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1461 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1462 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1463 } 1464 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1465 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1466 } else { 1467 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1468 IS tis; 1469 PetscInt schur_size; 1470 1471 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1472 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1476 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1477 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1478 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1479 } 1480 } 1481 PetscFunctionReturn(0); 1482 } 1483 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1487 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1488 { 1489 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1490 PC_IS *pcis = (PC_IS*)pc->data; 1491 PC pc_temp; 1492 Mat A_RR; 1493 MatReuse reuse; 1494 PetscScalar m_one = -1.0; 1495 PetscReal value; 1496 PetscInt n_D,n_R; 1497 PetscBool use_exact,use_exact_reduced,issbaij; 1498 PetscErrorCode ierr; 1499 /* prefixes stuff */ 1500 char dir_prefix[256],neu_prefix[256],str_level[16]; 1501 size_t len; 1502 1503 PetscFunctionBegin; 1504 1505 /* compute prefixes */ 1506 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1507 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1508 if (!pcbddc->current_level) { 1509 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1510 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1511 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1512 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1513 } else { 1514 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1515 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1516 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1517 len -= 15; /* remove "pc_bddc_coarse_" */ 1518 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1519 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1520 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1521 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1522 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1523 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1524 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1525 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1526 } 1527 1528 /* DIRICHLET PROBLEM */ 1529 if (dirichlet) { 1530 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1531 if (pcbddc->local_mat->symmetric_set) { 1532 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1533 } 1534 /* Matrix for Dirichlet problem is pcis->A_II */ 1535 n_D = pcis->n - pcis->n_B; 1536 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1537 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1538 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1539 /* default */ 1540 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1541 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1542 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1543 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1544 if (issbaij) { 1545 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1546 } else { 1547 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1548 } 1549 /* Allow user's customization */ 1550 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1551 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1552 } 1553 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1554 if (sub_schurs->reuse_mumps) { 1555 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1556 1557 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1558 } 1559 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1560 if (!n_D) { 1561 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1562 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1563 } 1564 /* Set Up KSP for Dirichlet problem of BDDC */ 1565 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1566 /* set ksp_D into pcis data */ 1567 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1568 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1569 pcis->ksp_D = pcbddc->ksp_D; 1570 } 1571 1572 /* NEUMANN PROBLEM */ 1573 A_RR = 0; 1574 if (neumann) { 1575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1576 PetscInt ibs,mbs; 1577 PetscBool issbaij; 1578 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1579 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1580 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1581 if (pcbddc->ksp_R) { /* already created ksp */ 1582 PetscInt nn_R; 1583 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1584 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1585 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1586 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1587 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1591 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1592 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1593 reuse = MAT_INITIAL_MATRIX; 1594 } else { /* safe to reuse the matrix */ 1595 reuse = MAT_REUSE_MATRIX; 1596 } 1597 } 1598 /* last check */ 1599 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1600 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1601 reuse = MAT_INITIAL_MATRIX; 1602 } 1603 } else { /* first time, so we need to create the matrix */ 1604 reuse = MAT_INITIAL_MATRIX; 1605 } 1606 /* extract A_RR */ 1607 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1608 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1610 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1611 if (matis->A == pcbddc->local_mat) { 1612 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1613 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1614 } else { 1615 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1616 } 1617 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1618 if (matis->A == pcbddc->local_mat) { 1619 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1620 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1623 } 1624 } 1625 if (!sub_schurs->reuse_mumps) { 1626 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1627 if (pcbddc->local_mat->symmetric_set) { 1628 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1629 } 1630 } else { 1631 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1632 1633 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1634 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1636 } 1637 if (!pcbddc->ksp_R) { /* create object if not present */ 1638 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1639 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1640 /* default */ 1641 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1642 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1643 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1644 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1645 if (issbaij) { 1646 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1647 } else { 1648 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1649 } 1650 /* Allow user's customization */ 1651 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1652 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1653 } 1654 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1655 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1656 if (!n_R) { 1657 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1658 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1659 } 1660 /* Reuse MUMPS solver if it is present */ 1661 if (sub_schurs->reuse_mumps) { 1662 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1663 1664 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1665 } 1666 /* Set Up KSP for Neumann problem of BDDC */ 1667 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1668 } 1669 /* free Neumann problem's matrix */ 1670 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1671 1672 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1673 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1678 } 1679 if (dirichlet) { /* Dirichlet */ 1680 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1681 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1682 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1683 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1684 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1685 /* need to be adapted? */ 1686 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1687 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1688 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1689 /* print info */ 1690 if (pcbddc->dbg_flag) { 1691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 } 1694 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1695 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1696 } 1697 } 1698 if (neumann) { /* Neumann */ 1699 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1700 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1701 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1702 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1703 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1704 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1705 /* need to be adapted? */ 1706 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1707 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1708 /* print info */ 1709 if (pcbddc->dbg_flag) { 1710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1712 } 1713 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1714 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1715 } 1716 } 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1723 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1724 { 1725 PetscErrorCode ierr; 1726 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1727 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1728 1729 PetscFunctionBegin; 1730 if (!sub_schurs->reuse_mumps) { 1731 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1732 } 1733 if (!pcbddc->switch_static) { 1734 if (applytranspose && pcbddc->local_auxmat1) { 1735 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1736 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1737 } 1738 if (!sub_schurs->reuse_mumps) { 1739 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 } else { 1742 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1743 1744 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 } 1747 } else { 1748 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1750 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 if (applytranspose && pcbddc->local_auxmat1) { 1753 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1754 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 } 1759 if (!sub_schurs->reuse_mumps) { 1760 if (applytranspose) { 1761 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1762 } else { 1763 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1764 } 1765 #if defined(PETSC_HAVE_MUMPS) 1766 } else { 1767 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1768 1769 if (applytranspose) { 1770 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1771 } else { 1772 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1773 } 1774 #endif 1775 } 1776 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1777 if (!pcbddc->switch_static) { 1778 if (!sub_schurs->reuse_mumps) { 1779 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 } else { 1782 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1783 1784 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1786 } 1787 if (!applytranspose && pcbddc->local_auxmat1) { 1788 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1789 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1790 } 1791 } else { 1792 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1796 if (!applytranspose && pcbddc->local_auxmat1) { 1797 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1798 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1799 } 1800 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1811 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1812 { 1813 PetscErrorCode ierr; 1814 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1815 PC_IS* pcis = (PC_IS*) (pc->data); 1816 const PetscScalar zero = 0.0; 1817 1818 PetscFunctionBegin; 1819 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1820 if (applytranspose) { 1821 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1822 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1823 } else { 1824 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1825 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1826 } 1827 /* start communications from local primal nodes to rhs of coarse solver */ 1828 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1829 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1831 1832 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1833 /* TODO remove null space when doing multilevel */ 1834 if (pcbddc->coarse_ksp) { 1835 Vec rhs,sol; 1836 1837 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1838 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1839 if (applytranspose) { 1840 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1841 } else { 1842 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1843 } 1844 } 1845 1846 /* Local solution on R nodes */ 1847 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1848 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1849 } 1850 1851 /* communications from coarse sol to local primal nodes */ 1852 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1854 1855 /* Sum contributions from two levels */ 1856 if (applytranspose) { 1857 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1858 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1859 } else { 1860 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1861 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1868 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1869 { 1870 PetscErrorCode ierr; 1871 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1872 PetscScalar *array; 1873 Vec from,to; 1874 1875 PetscFunctionBegin; 1876 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1877 from = pcbddc->coarse_vec; 1878 to = pcbddc->vec1_P; 1879 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1880 Vec tvec; 1881 1882 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1883 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1884 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1885 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1886 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1887 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1888 } 1889 } else { /* from local to global -> put data in coarse right hand side */ 1890 from = pcbddc->vec1_P; 1891 to = pcbddc->coarse_vec; 1892 } 1893 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1899 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1900 { 1901 PetscErrorCode ierr; 1902 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1903 PetscScalar *array; 1904 Vec from,to; 1905 1906 PetscFunctionBegin; 1907 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1908 from = pcbddc->coarse_vec; 1909 to = pcbddc->vec1_P; 1910 } else { /* from local to global -> put data in coarse right hand side */ 1911 from = pcbddc->vec1_P; 1912 to = pcbddc->coarse_vec; 1913 } 1914 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1915 if (smode == SCATTER_FORWARD) { 1916 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1917 Vec tvec; 1918 1919 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1920 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1921 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1922 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1923 } 1924 } else { 1925 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1926 ierr = VecResetArray(from);CHKERRQ(ierr); 1927 } 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* uncomment for testing purposes */ 1933 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1936 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1937 { 1938 PetscErrorCode ierr; 1939 PC_IS* pcis = (PC_IS*)(pc->data); 1940 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1941 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1942 /* one and zero */ 1943 PetscScalar one=1.0,zero=0.0; 1944 /* space to store constraints and their local indices */ 1945 PetscScalar *constraints_data; 1946 PetscInt *constraints_idxs,*constraints_idxs_B; 1947 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1948 PetscInt *constraints_n; 1949 /* iterators */ 1950 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1951 /* BLAS integers */ 1952 PetscBLASInt lwork,lierr; 1953 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1954 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1955 /* reuse */ 1956 PetscInt olocal_primal_size,olocal_primal_size_cc; 1957 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1958 /* change of basis */ 1959 PetscBool qr_needed; 1960 PetscBT change_basis,qr_needed_idx; 1961 /* auxiliary stuff */ 1962 PetscInt *nnz,*is_indices; 1963 PetscInt ncc; 1964 /* some quantities */ 1965 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1966 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1967 1968 PetscFunctionBegin; 1969 /* Destroy Mat objects computed previously */ 1970 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1971 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1972 /* save info on constraints from previous setup (if any) */ 1973 olocal_primal_size = pcbddc->local_primal_size; 1974 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1975 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1976 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1977 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1978 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1979 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1980 1981 /* print some info */ 1982 if (pcbddc->dbg_flag) { 1983 IS vertices; 1984 PetscInt nv,nedges,nfaces; 1985 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1986 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1987 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1994 } 1995 1996 if (!pcbddc->adaptive_selection) { 1997 IS ISForVertices,*ISForFaces,*ISForEdges; 1998 MatNullSpace nearnullsp; 1999 const Vec *nearnullvecs; 2000 Vec *localnearnullsp; 2001 PetscScalar *array; 2002 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2003 PetscBool nnsp_has_cnst; 2004 /* LAPACK working arrays for SVD or POD */ 2005 PetscBool skip_lapack,boolforchange; 2006 PetscScalar *work; 2007 PetscReal *singular_vals; 2008 #if defined(PETSC_USE_COMPLEX) 2009 PetscReal *rwork; 2010 #endif 2011 #if defined(PETSC_MISSING_LAPACK_GESVD) 2012 PetscScalar *temp_basis,*correlation_mat; 2013 #else 2014 PetscBLASInt dummy_int=1; 2015 PetscScalar dummy_scalar=1.; 2016 #endif 2017 2018 /* Get index sets for faces, edges and vertices from graph */ 2019 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2020 /* free unneeded index sets */ 2021 if (!pcbddc->use_vertices) { 2022 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2023 } 2024 if (!pcbddc->use_edges) { 2025 for (i=0;i<n_ISForEdges;i++) { 2026 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2029 n_ISForEdges = 0; 2030 } 2031 if (!pcbddc->use_faces) { 2032 for (i=0;i<n_ISForFaces;i++) { 2033 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2036 n_ISForFaces = 0; 2037 } 2038 2039 #if defined(PETSC_USE_DEBUG) 2040 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2041 Also use_change_of_basis should be consistent among processors */ 2042 if (pcbddc->NullSpace) { 2043 PetscBool tbool[2],gbool[2]; 2044 2045 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2046 pcbddc->use_change_of_basis = PETSC_TRUE; 2047 if (!ISForEdges) { 2048 pcbddc->use_change_on_faces = PETSC_TRUE; 2049 } 2050 } 2051 tbool[0] = pcbddc->use_change_of_basis; 2052 tbool[1] = pcbddc->use_change_on_faces; 2053 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2054 pcbddc->use_change_of_basis = gbool[0]; 2055 pcbddc->use_change_on_faces = gbool[1]; 2056 } 2057 #endif 2058 2059 /* check if near null space is attached to global mat */ 2060 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2061 if (nearnullsp) { 2062 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2063 /* remove any stored info */ 2064 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2065 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2066 /* store information for BDDC solver reuse */ 2067 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2068 pcbddc->onearnullspace = nearnullsp; 2069 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2070 for (i=0;i<nnsp_size;i++) { 2071 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2072 } 2073 } else { /* if near null space is not provided BDDC uses constants by default */ 2074 nnsp_size = 0; 2075 nnsp_has_cnst = PETSC_TRUE; 2076 } 2077 /* get max number of constraints on a single cc */ 2078 max_constraints = nnsp_size; 2079 if (nnsp_has_cnst) max_constraints++; 2080 2081 /* 2082 Evaluate maximum storage size needed by the procedure 2083 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2084 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2085 There can be multiple constraints per connected component 2086 */ 2087 n_vertices = 0; 2088 if (ISForVertices) { 2089 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2090 } 2091 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2092 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2093 2094 total_counts = n_ISForFaces+n_ISForEdges; 2095 total_counts *= max_constraints; 2096 total_counts += n_vertices; 2097 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2098 2099 total_counts = 0; 2100 max_size_of_constraint = 0; 2101 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2102 IS used_is; 2103 if (i<n_ISForEdges) { 2104 used_is = ISForEdges[i]; 2105 } else { 2106 used_is = ISForFaces[i-n_ISForEdges]; 2107 } 2108 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2109 total_counts += j; 2110 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2111 } 2112 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2113 2114 /* get local part of global near null space vectors */ 2115 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2116 for (k=0;k<nnsp_size;k++) { 2117 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2118 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 } 2121 2122 /* whether or not to skip lapack calls */ 2123 skip_lapack = PETSC_TRUE; 2124 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2125 2126 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2127 if (!skip_lapack) { 2128 PetscScalar temp_work; 2129 2130 #if defined(PETSC_MISSING_LAPACK_GESVD) 2131 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2132 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2135 #if defined(PETSC_USE_COMPLEX) 2136 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2137 #endif 2138 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2139 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2140 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2141 lwork = -1; 2142 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2143 #if !defined(PETSC_USE_COMPLEX) 2144 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2145 #else 2146 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2147 #endif 2148 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2149 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2150 #else /* on missing GESVD */ 2151 /* SVD */ 2152 PetscInt max_n,min_n; 2153 max_n = max_size_of_constraint; 2154 min_n = max_constraints; 2155 if (max_size_of_constraint < max_constraints) { 2156 min_n = max_size_of_constraint; 2157 max_n = max_constraints; 2158 } 2159 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2160 #if defined(PETSC_USE_COMPLEX) 2161 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2162 #endif 2163 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2164 lwork = -1; 2165 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2167 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2168 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2169 #if !defined(PETSC_USE_COMPLEX) 2170 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2171 #else 2172 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2173 #endif 2174 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2175 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2176 #endif /* on missing GESVD */ 2177 /* Allocate optimal workspace */ 2178 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2179 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2180 } 2181 /* Now we can loop on constraining sets */ 2182 total_counts = 0; 2183 constraints_idxs_ptr[0] = 0; 2184 constraints_data_ptr[0] = 0; 2185 /* vertices */ 2186 if (n_vertices) { 2187 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2188 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2189 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2190 for (i=0;i<n_vertices;i++) { 2191 constraints_n[total_counts] = 1; 2192 constraints_data[total_counts] = 1.0; 2193 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2194 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2195 total_counts++; 2196 } 2197 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2198 PetscBool used_vertex; 2199 for (i=0;i<n_vertices;i++) { 2200 used_vertex = PETSC_FALSE; 2201 k = 0; 2202 while (!used_vertex && k<nnsp_size) { 2203 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2204 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2205 constraints_n[total_counts] = 1; 2206 constraints_idxs[total_counts] = is_indices[i]; 2207 constraints_data[total_counts] = 1.0; 2208 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2209 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2210 total_counts++; 2211 used_vertex = PETSC_TRUE; 2212 } 2213 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2214 k++; 2215 } 2216 } 2217 } 2218 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2219 n_vertices = total_counts; 2220 } 2221 2222 /* edges and faces */ 2223 total_counts_cc = total_counts; 2224 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2225 IS used_is; 2226 PetscBool idxs_copied = PETSC_FALSE; 2227 2228 if (ncc<n_ISForEdges) { 2229 used_is = ISForEdges[ncc]; 2230 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2231 } else { 2232 used_is = ISForFaces[ncc-n_ISForEdges]; 2233 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2234 } 2235 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2236 2237 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2238 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2239 /* change of basis should not be performed on local periodic nodes */ 2240 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2241 if (nnsp_has_cnst) { 2242 PetscScalar quad_value; 2243 2244 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2245 idxs_copied = PETSC_TRUE; 2246 2247 if (!pcbddc->use_nnsp_true) { 2248 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2249 } else { 2250 quad_value = 1.0; 2251 } 2252 for (j=0;j<size_of_constraint;j++) { 2253 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2254 } 2255 temp_constraints++; 2256 total_counts++; 2257 } 2258 for (k=0;k<nnsp_size;k++) { 2259 PetscReal real_value; 2260 PetscScalar *ptr_to_data; 2261 2262 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2263 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2264 for (j=0;j<size_of_constraint;j++) { 2265 ptr_to_data[j] = array[is_indices[j]]; 2266 } 2267 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2268 /* check if array is null on the connected component */ 2269 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2270 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2271 if (real_value > 0.0) { /* keep indices and values */ 2272 temp_constraints++; 2273 total_counts++; 2274 if (!idxs_copied) { 2275 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2276 idxs_copied = PETSC_TRUE; 2277 } 2278 } 2279 } 2280 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2281 valid_constraints = temp_constraints; 2282 if (!pcbddc->use_nnsp_true && temp_constraints) { 2283 if (temp_constraints == 1) { /* just normalize the constraint */ 2284 PetscScalar norm,*ptr_to_data; 2285 2286 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2288 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2289 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2290 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2291 } else { /* perform SVD */ 2292 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2293 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2294 2295 #if defined(PETSC_MISSING_LAPACK_GESVD) 2296 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2297 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2298 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2299 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2300 from that computed using LAPACKgesvd 2301 -> This is due to a different computation of eigenvectors in LAPACKheev 2302 -> The quality of the POD-computed basis will be the same */ 2303 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2304 /* Store upper triangular part of correlation matrix */ 2305 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2306 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2307 for (j=0;j<temp_constraints;j++) { 2308 for (k=0;k<j+1;k++) { 2309 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2310 } 2311 } 2312 /* compute eigenvalues and eigenvectors of correlation matrix */ 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2315 #if !defined(PETSC_USE_COMPLEX) 2316 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2317 #else 2318 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2319 #endif 2320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2321 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2322 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2323 j = 0; 2324 while (j < temp_constraints && singular_vals[j] < tol) j++; 2325 total_counts = total_counts-j; 2326 valid_constraints = temp_constraints-j; 2327 /* scale and copy POD basis into used quadrature memory */ 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2334 if (j<temp_constraints) { 2335 PetscInt ii; 2336 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2337 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2338 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2339 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2340 for (k=0;k<temp_constraints-j;k++) { 2341 for (ii=0;ii<size_of_constraint;ii++) { 2342 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2343 } 2344 } 2345 } 2346 #else /* on missing GESVD */ 2347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2351 #if !defined(PETSC_USE_COMPLEX) 2352 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2353 #else 2354 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2355 #endif 2356 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2357 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2358 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2359 k = temp_constraints; 2360 if (k > size_of_constraint) k = size_of_constraint; 2361 j = 0; 2362 while (j < k && singular_vals[k-j-1] < tol) j++; 2363 valid_constraints = k-j; 2364 total_counts = total_counts-temp_constraints+valid_constraints; 2365 #endif /* on missing GESVD */ 2366 } 2367 } 2368 /* update pointers information */ 2369 if (valid_constraints) { 2370 constraints_n[total_counts_cc] = valid_constraints; 2371 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2372 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2373 /* set change_of_basis flag */ 2374 if (boolforchange) { 2375 PetscBTSet(change_basis,total_counts_cc); 2376 } 2377 total_counts_cc++; 2378 } 2379 } 2380 /* free workspace */ 2381 if (!skip_lapack) { 2382 ierr = PetscFree(work);CHKERRQ(ierr); 2383 #if defined(PETSC_USE_COMPLEX) 2384 ierr = PetscFree(rwork);CHKERRQ(ierr); 2385 #endif 2386 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2387 #if defined(PETSC_MISSING_LAPACK_GESVD) 2388 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2389 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2390 #endif 2391 } 2392 for (k=0;k<nnsp_size;k++) { 2393 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2394 } 2395 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2396 /* free index sets of faces, edges and vertices */ 2397 for (i=0;i<n_ISForFaces;i++) { 2398 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2399 } 2400 if (n_ISForFaces) { 2401 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2402 } 2403 for (i=0;i<n_ISForEdges;i++) { 2404 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2405 } 2406 if (n_ISForEdges) { 2407 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2408 } 2409 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2410 } else { 2411 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2412 2413 total_counts = 0; 2414 n_vertices = 0; 2415 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2416 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2417 } 2418 max_constraints = 0; 2419 total_counts_cc = 0; 2420 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2421 total_counts += pcbddc->adaptive_constraints_n[i]; 2422 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2423 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2424 } 2425 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2426 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2427 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2428 constraints_data = pcbddc->adaptive_constraints_data; 2429 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2430 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2431 total_counts_cc = 0; 2432 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2433 if (pcbddc->adaptive_constraints_n[i]) { 2434 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2435 } 2436 } 2437 #if 0 2438 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2439 for (i=0;i<total_counts_cc;i++) { 2440 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2441 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2442 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2443 printf(" %d",constraints_idxs[j]); 2444 } 2445 printf("\n"); 2446 printf("number of cc: %d\n",constraints_n[i]); 2447 } 2448 for (i=0;i<n_vertices;i++) { 2449 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2450 } 2451 for (i=0;i<sub_schurs->n_subs;i++) { 2452 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2453 } 2454 #endif 2455 2456 max_size_of_constraint = 0; 2457 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2458 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2459 /* Change of basis */ 2460 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2461 if (pcbddc->use_change_of_basis) { 2462 for (i=0;i<sub_schurs->n_subs;i++) { 2463 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2464 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2465 } 2466 } 2467 } 2468 } 2469 pcbddc->local_primal_size = total_counts; 2470 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2471 2472 /* map constraints_idxs in boundary numbering */ 2473 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2474 if (i != constraints_idxs_ptr[total_counts_cc]) { 2475 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2476 } 2477 2478 /* Create constraint matrix */ 2479 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2480 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2481 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2482 2483 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2484 /* determine if a QR strategy is needed for change of basis */ 2485 qr_needed = PETSC_FALSE; 2486 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2487 total_primal_vertices=0; 2488 pcbddc->local_primal_size_cc = 0; 2489 for (i=0;i<total_counts_cc;i++) { 2490 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2491 if (size_of_constraint == 1) { 2492 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2493 pcbddc->local_primal_size_cc += 1; 2494 } else if (PetscBTLookup(change_basis,i)) { 2495 for (k=0;k<constraints_n[i];k++) { 2496 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2497 } 2498 pcbddc->local_primal_size_cc += constraints_n[i]; 2499 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2500 PetscBTSet(qr_needed_idx,i); 2501 qr_needed = PETSC_TRUE; 2502 } 2503 } else { 2504 pcbddc->local_primal_size_cc += 1; 2505 } 2506 } 2507 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2508 pcbddc->n_vertices = total_primal_vertices; 2509 /* permute indices in order to have a sorted set of vertices */ 2510 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2511 2512 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2513 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2514 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2515 2516 /* nonzero structure of constraint matrix */ 2517 /* and get reference dof for local constraints */ 2518 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2519 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2520 2521 j = total_primal_vertices; 2522 total_counts = total_primal_vertices; 2523 cum = total_primal_vertices; 2524 for (i=n_vertices;i<total_counts_cc;i++) { 2525 if (!PetscBTLookup(change_basis,i)) { 2526 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2527 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2528 cum++; 2529 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2530 for (k=0;k<constraints_n[i];k++) { 2531 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2532 nnz[j+k] = size_of_constraint; 2533 } 2534 j += constraints_n[i]; 2535 } 2536 } 2537 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2538 ierr = PetscFree(nnz);CHKERRQ(ierr); 2539 2540 /* set values in constraint matrix */ 2541 for (i=0;i<total_primal_vertices;i++) { 2542 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2543 } 2544 total_counts = total_primal_vertices; 2545 for (i=n_vertices;i<total_counts_cc;i++) { 2546 if (!PetscBTLookup(change_basis,i)) { 2547 PetscInt *cols; 2548 2549 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2550 cols = constraints_idxs+constraints_idxs_ptr[i]; 2551 for (k=0;k<constraints_n[i];k++) { 2552 PetscInt row = total_counts+k; 2553 PetscScalar *vals; 2554 2555 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2556 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2557 } 2558 total_counts += constraints_n[i]; 2559 } 2560 } 2561 /* assembling */ 2562 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2564 2565 /* 2566 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2567 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2568 */ 2569 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2570 if (pcbddc->use_change_of_basis) { 2571 /* dual and primal dofs on a single cc */ 2572 PetscInt dual_dofs,primal_dofs; 2573 /* working stuff for GEQRF */ 2574 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2575 PetscBLASInt lqr_work; 2576 /* working stuff for UNGQR */ 2577 PetscScalar *gqr_work,lgqr_work_t; 2578 PetscBLASInt lgqr_work; 2579 /* working stuff for TRTRS */ 2580 PetscScalar *trs_rhs; 2581 PetscBLASInt Blas_NRHS; 2582 /* pointers for values insertion into change of basis matrix */ 2583 PetscInt *start_rows,*start_cols; 2584 PetscScalar *start_vals; 2585 /* working stuff for values insertion */ 2586 PetscBT is_primal; 2587 PetscInt *aux_primal_numbering_B; 2588 /* matrix sizes */ 2589 PetscInt global_size,local_size; 2590 /* temporary change of basis */ 2591 Mat localChangeOfBasisMatrix; 2592 /* extra space for debugging */ 2593 PetscScalar *dbg_work; 2594 2595 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2596 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2597 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2598 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2599 /* nonzeros for local mat */ 2600 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2601 for (i=0;i<pcis->n;i++) nnz[i]=1; 2602 for (i=n_vertices;i<total_counts_cc;i++) { 2603 if (PetscBTLookup(change_basis,i)) { 2604 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2605 if (PetscBTLookup(qr_needed_idx,i)) { 2606 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2607 } else { 2608 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2609 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2610 } 2611 } 2612 } 2613 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2614 ierr = PetscFree(nnz);CHKERRQ(ierr); 2615 /* Set initial identity in the matrix */ 2616 for (i=0;i<pcis->n;i++) { 2617 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2618 } 2619 2620 if (pcbddc->dbg_flag) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2623 } 2624 2625 2626 /* Now we loop on the constraints which need a change of basis */ 2627 /* 2628 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2629 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2630 2631 Basic blocks of change of basis matrix T computed by 2632 2633 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2634 2635 | 1 0 ... 0 s_1/S | 2636 | 0 1 ... 0 s_2/S | 2637 | ... | 2638 | 0 ... 1 s_{n-1}/S | 2639 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2640 2641 with S = \sum_{i=1}^n s_i^2 2642 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2643 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2644 2645 - QR decomposition of constraints otherwise 2646 */ 2647 if (qr_needed) { 2648 /* space to store Q */ 2649 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2650 /* first we issue queries for optimal work */ 2651 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2653 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2654 lqr_work = -1; 2655 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2656 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2657 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2658 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2659 lgqr_work = -1; 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2661 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2664 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2665 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2666 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2667 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2668 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2669 /* array to store scaling factors for reflectors */ 2670 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2671 /* array to store rhs and solution of triangular solver */ 2672 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2673 /* allocating workspace for check */ 2674 if (pcbddc->dbg_flag) { 2675 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2676 } 2677 } 2678 /* array to store whether a node is primal or not */ 2679 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2680 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2681 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2682 if (i != total_primal_vertices) { 2683 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2684 } 2685 for (i=0;i<total_primal_vertices;i++) { 2686 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2687 } 2688 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2689 2690 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2691 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2692 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2693 if (PetscBTLookup(change_basis,total_counts)) { 2694 /* get constraint info */ 2695 primal_dofs = constraints_n[total_counts]; 2696 dual_dofs = size_of_constraint-primal_dofs; 2697 2698 if (pcbddc->dbg_flag) { 2699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2700 } 2701 2702 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2703 2704 /* copy quadrature constraints for change of basis check */ 2705 if (pcbddc->dbg_flag) { 2706 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2707 } 2708 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2709 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2710 2711 /* compute QR decomposition of constraints */ 2712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2715 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2716 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2717 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2718 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2719 2720 /* explictly compute R^-T */ 2721 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2722 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2727 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2728 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2729 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2730 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2731 2732 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2737 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2738 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2740 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2741 2742 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2743 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2744 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2745 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2751 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2752 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2753 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2754 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2755 2756 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2757 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2758 /* insert cols for primal dofs */ 2759 for (j=0;j<primal_dofs;j++) { 2760 start_vals = &qr_basis[j*size_of_constraint]; 2761 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2762 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2763 } 2764 /* insert cols for dual dofs */ 2765 for (j=0,k=0;j<dual_dofs;k++) { 2766 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2767 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2768 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2769 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2770 j++; 2771 } 2772 } 2773 2774 /* check change of basis */ 2775 if (pcbddc->dbg_flag) { 2776 PetscInt ii,jj; 2777 PetscBool valid_qr=PETSC_TRUE; 2778 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2784 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2785 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2786 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2787 for (jj=0;jj<size_of_constraint;jj++) { 2788 for (ii=0;ii<primal_dofs;ii++) { 2789 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2790 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2791 } 2792 } 2793 if (!valid_qr) { 2794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2795 for (jj=0;jj<size_of_constraint;jj++) { 2796 for (ii=0;ii<primal_dofs;ii++) { 2797 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2798 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2799 } 2800 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2801 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2802 } 2803 } 2804 } 2805 } else { 2806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2807 } 2808 } 2809 } else { /* simple transformation block */ 2810 PetscInt row,col; 2811 PetscScalar val,norm; 2812 2813 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2814 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2815 for (j=0;j<size_of_constraint;j++) { 2816 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2817 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2818 if (!PetscBTLookup(is_primal,row_B)) { 2819 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2820 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2821 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2822 } else { 2823 for (k=0;k<size_of_constraint;k++) { 2824 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2825 if (row != col) { 2826 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2827 } else { 2828 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2829 } 2830 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2831 } 2832 } 2833 } 2834 if (pcbddc->dbg_flag) { 2835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2836 } 2837 } 2838 } else { 2839 if (pcbddc->dbg_flag) { 2840 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2841 } 2842 } 2843 } 2844 2845 /* free workspace */ 2846 if (qr_needed) { 2847 if (pcbddc->dbg_flag) { 2848 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2849 } 2850 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2852 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2853 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2854 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2855 } 2856 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2857 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2858 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 2860 /* assembling of global change of variable */ 2861 { 2862 Mat tmat; 2863 PetscInt bs; 2864 2865 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2866 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2867 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2868 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2869 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2870 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2871 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2872 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2873 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2874 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2875 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2876 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2877 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2878 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2879 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2881 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2882 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2883 } 2884 /* check */ 2885 if (pcbddc->dbg_flag) { 2886 PetscReal error; 2887 Vec x,x_change; 2888 2889 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2890 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2891 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2892 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2893 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2895 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2896 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2897 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2898 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2899 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2900 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2903 ierr = VecDestroy(&x);CHKERRQ(ierr); 2904 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2905 } 2906 2907 /* adapt sub_schurs computed (if any) */ 2908 if (pcbddc->use_deluxe_scaling) { 2909 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2910 if (sub_schurs->S_Ej_all) { 2911 Mat S_new,tmat; 2912 IS is_all_N; 2913 2914 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2915 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2916 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2917 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2918 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2919 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2920 sub_schurs->S_Ej_all = S_new; 2921 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2922 if (sub_schurs->sum_S_Ej_all) { 2923 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2924 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2925 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2926 sub_schurs->sum_S_Ej_all = S_new; 2927 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2928 } 2929 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2930 } 2931 } 2932 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2933 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2934 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2935 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2936 } 2937 2938 /* set up change of basis context */ 2939 if (pcbddc->ChangeOfBasisMatrix) { 2940 PCBDDCChange_ctx change_ctx; 2941 2942 if (!pcbddc->new_global_mat) { 2943 PetscInt global_size,local_size; 2944 2945 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2946 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2947 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2948 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2949 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2950 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2951 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2952 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2953 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2954 } else { 2955 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2956 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2957 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2958 } 2959 if (!pcbddc->user_ChangeOfBasisMatrix) { 2960 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2961 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2962 } else { 2963 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2964 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2965 } 2966 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2967 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2968 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2970 } 2971 2972 /* check if a new primal space has been introduced */ 2973 pcbddc->new_primal_space_local = PETSC_TRUE; 2974 if (olocal_primal_size == pcbddc->local_primal_size) { 2975 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2976 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2977 if (!pcbddc->new_primal_space_local) { 2978 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2979 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2980 } 2981 } 2982 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2983 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2984 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2985 2986 /* flush dbg viewer */ 2987 if (pcbddc->dbg_flag) { 2988 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2989 } 2990 2991 /* free workspace */ 2992 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2993 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2994 if (!pcbddc->adaptive_selection) { 2995 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2996 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2997 } else { 2998 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2999 pcbddc->adaptive_constraints_idxs_ptr, 3000 pcbddc->adaptive_constraints_data_ptr, 3001 pcbddc->adaptive_constraints_idxs, 3002 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3003 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3004 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3005 } 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3011 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3012 { 3013 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3014 PC_IS *pcis = (PC_IS*)pc->data; 3015 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3016 PetscInt ierr,i,vertex_size,N; 3017 PetscViewer viewer=pcbddc->dbg_viewer; 3018 3019 PetscFunctionBegin; 3020 /* Reset previously computed graph */ 3021 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3022 /* Init local Graph struct */ 3023 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3024 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3025 3026 /* Check validity of the csr graph passed in by the user */ 3027 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3028 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3029 } 3030 3031 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3032 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3033 PetscInt *xadj,*adjncy; 3034 PetscInt nvtxs; 3035 PetscBool flg_row=PETSC_FALSE; 3036 3037 if (pcbddc->use_local_adj) { 3038 3039 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3040 if (flg_row) { 3041 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3042 pcbddc->computed_rowadj = PETSC_TRUE; 3043 } 3044 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3045 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3046 IS is_dummy; 3047 ISLocalToGlobalMapping l2gmap_dummy; 3048 PetscInt j,sum; 3049 PetscInt *cxadj,*cadjncy; 3050 const PetscInt *idxs; 3051 PCBDDCGraph graph; 3052 PetscBT is_on_boundary; 3053 3054 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3055 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3056 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3057 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3058 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3059 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3060 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3061 if (flg_row) { 3062 graph->xadj = xadj; 3063 graph->adjncy = adjncy; 3064 } 3065 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3066 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3067 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3068 3069 if (pcbddc->dbg_flag) { 3070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3071 for (i=0;i<graph->ncc;i++) { 3072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3073 } 3074 } 3075 3076 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3077 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3078 for (i=0;i<pcis->n_B;i++) { 3079 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3080 } 3081 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3082 3083 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3084 sum = 0; 3085 for (i=0;i<graph->ncc;i++) { 3086 PetscInt sizecc = 0; 3087 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3088 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3089 sizecc++; 3090 } 3091 } 3092 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3093 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3094 cxadj[graph->queue[j]] = sizecc; 3095 } 3096 } 3097 sum += sizecc*sizecc; 3098 } 3099 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3100 sum = 0; 3101 for (i=0;i<pcis->n;i++) { 3102 PetscInt temp = cxadj[i]; 3103 cxadj[i] = sum; 3104 sum += temp; 3105 } 3106 cxadj[pcis->n] = sum; 3107 for (i=0;i<graph->ncc;i++) { 3108 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3109 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3110 PetscInt k,sizecc = 0; 3111 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3112 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3113 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3114 sizecc++; 3115 } 3116 } 3117 } 3118 } 3119 } 3120 if (sum) { 3121 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3122 } else { 3123 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3124 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3125 } 3126 graph->xadj = 0; 3127 graph->adjncy = 0; 3128 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3129 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3130 } 3131 } 3132 if (pcbddc->dbg_flag) { 3133 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3134 } 3135 3136 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3137 vertex_size = 1; 3138 if (pcbddc->user_provided_isfordofs) { 3139 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3140 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3141 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3142 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3143 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3144 } 3145 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3146 pcbddc->n_ISForDofs = 0; 3147 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3148 } 3149 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3150 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3151 } else { 3152 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3153 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3155 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3156 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3157 } 3158 } 3159 } 3160 3161 /* Setup of Graph */ 3162 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3163 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3164 } 3165 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3166 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3167 } 3168 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3169 3170 /* Graph's connected components analysis */ 3171 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3172 3173 /* print some info to stdout */ 3174 if (pcbddc->dbg_flag) { 3175 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3176 } 3177 3178 /* mark topography has done */ 3179 pcbddc->recompute_topography = PETSC_FALSE; 3180 PetscFunctionReturn(0); 3181 } 3182 3183 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "PCBDDCSubsetNumbering" 3186 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3187 { 3188 PetscSF sf; 3189 PetscLayout map; 3190 const PetscInt *idxs; 3191 PetscInt *leaf_data,*root_data,*gidxs; 3192 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3193 PetscInt n_n,nlocals,start,first_index; 3194 PetscMPIInt commsize; 3195 PetscBool first_found; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3200 if (subset_mult) { 3201 PetscCheckSameComm(subset,1,subset_mult,2); 3202 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3203 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3204 } 3205 /* create workspace layout for computing global indices of subset */ 3206 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3207 lbounds[0] = lbounds[1] = 0; 3208 for (i=0;i<n;i++) { 3209 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3210 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3211 } 3212 lbounds[0] = -lbounds[0]; 3213 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3214 gbounds[0] = -gbounds[0]; 3215 N = gbounds[1] - gbounds[0] + 1; 3216 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3220 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3221 3222 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3223 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3224 if (subset_mult) { 3225 const PetscInt* idxs_mult; 3226 3227 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3228 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3229 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3230 } else { 3231 for (i=0;i<n;i++) leaf_data[i] = 1; 3232 } 3233 /* local size of new subset */ 3234 n_n = 0; 3235 for (i=0;i<n;i++) n_n += leaf_data[i]; 3236 3237 /* global indexes in layout */ 3238 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3239 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3240 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3241 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3242 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3243 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3244 3245 /* reduce from leaves to roots */ 3246 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3247 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3248 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3249 3250 /* count indexes in local part of layout */ 3251 nlocals = 0; 3252 first_index = -1; 3253 first_found = PETSC_FALSE; 3254 for (i=0;i<Nl;i++) { 3255 if (!first_found && root_data[i]) { 3256 first_found = PETSC_TRUE; 3257 first_index = i; 3258 } 3259 nlocals += root_data[i]; 3260 } 3261 3262 /* cumulative of number of indexes and size of subset without holes */ 3263 #if defined(PETSC_HAVE_MPI_EXSCAN) 3264 start = 0; 3265 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3266 #else 3267 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3268 start = start-nlocals; 3269 #endif 3270 3271 if (N_n) { /* compute total size of new subset if requested */ 3272 *N_n = start + nlocals; 3273 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3274 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3275 } 3276 3277 /* adapt root data with cumulative */ 3278 if (first_found) { 3279 PetscInt old_index; 3280 3281 root_data[first_index] += start; 3282 old_index = first_index; 3283 for (i=first_index+1;i<Nl;i++) { 3284 if (root_data[i]) { 3285 root_data[i] += root_data[old_index]; 3286 old_index = i; 3287 } 3288 } 3289 } 3290 3291 /* from roots to leaves */ 3292 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3293 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3294 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3295 3296 /* create new IS with global indexes without holes */ 3297 if (subset_mult) { 3298 const PetscInt* idxs_mult; 3299 PetscInt cum; 3300 3301 cum = 0; 3302 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3303 for (i=0;i<n;i++) { 3304 PetscInt j; 3305 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3306 } 3307 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3308 } else { 3309 for (i=0;i<n;i++) { 3310 gidxs[i] = leaf_data[i]-1; 3311 } 3312 } 3313 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3314 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3315 PetscFunctionReturn(0); 3316 } 3317 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3320 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3321 { 3322 PetscInt i,j; 3323 PetscScalar *alphas; 3324 PetscErrorCode ierr; 3325 3326 PetscFunctionBegin; 3327 /* this implements stabilized Gram-Schmidt */ 3328 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3329 for (i=0;i<n;i++) { 3330 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3331 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3332 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3333 } 3334 ierr = PetscFree(alphas);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 3338 #undef __FUNCT__ 3339 #define __FUNCT__ "MatISGetSubassemblingPattern" 3340 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3341 { 3342 IS ranks_send_to; 3343 PetscInt n_neighs,*neighs,*n_shared,**shared; 3344 PetscMPIInt size,rank,color; 3345 PetscInt *xadj,*adjncy; 3346 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3347 PetscInt i,local_size,threshold=0; 3348 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3349 PetscSubcomm subcomm; 3350 PetscErrorCode ierr; 3351 3352 PetscFunctionBegin; 3353 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3354 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3355 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3356 3357 /* Get info on mapping */ 3358 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3359 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3360 3361 /* build local CSR graph of subdomains' connectivity */ 3362 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3363 xadj[0] = 0; 3364 xadj[1] = PetscMax(n_neighs-1,0); 3365 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3366 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3367 3368 if (threshold) { 3369 PetscInt xadj_count = 0; 3370 for (i=1;i<n_neighs;i++) { 3371 if (n_shared[i] > threshold) { 3372 adjncy[xadj_count] = neighs[i]; 3373 adjncy_wgt[xadj_count] = n_shared[i]; 3374 xadj_count++; 3375 } 3376 } 3377 xadj[1] = xadj_count; 3378 } else { 3379 if (xadj[1]) { 3380 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3381 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3382 } 3383 } 3384 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3385 if (use_square) { 3386 for (i=0;i<xadj[1];i++) { 3387 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3388 } 3389 } 3390 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3391 3392 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3393 3394 /* 3395 Restrict work on active processes only. 3396 */ 3397 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3398 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3399 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3400 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3401 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3402 if (color) { 3403 ierr = PetscFree(xadj);CHKERRQ(ierr); 3404 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3405 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3406 } else { 3407 Mat subdomain_adj; 3408 IS new_ranks,new_ranks_contig; 3409 MatPartitioning partitioner; 3410 PetscInt prank,rstart=0,rend=0; 3411 PetscInt *is_indices,*oldranks; 3412 PetscBool aggregate; 3413 3414 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3415 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3416 prank = rank; 3417 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3418 /* 3419 for (i=0;i<size;i++) { 3420 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3421 } 3422 */ 3423 for (i=0;i<xadj[1];i++) { 3424 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3425 } 3426 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3427 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3428 if (aggregate) { 3429 PetscInt lrows,row,ncols,*cols; 3430 PetscMPIInt nrank; 3431 PetscScalar *vals; 3432 3433 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3434 lrows = 0; 3435 if (nrank<redprocs) { 3436 lrows = size/redprocs; 3437 if (nrank<size%redprocs) lrows++; 3438 } 3439 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3440 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3441 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3442 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3443 row = nrank; 3444 ncols = xadj[1]-xadj[0]; 3445 cols = adjncy; 3446 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3447 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3448 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3449 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 ierr = PetscFree(xadj);CHKERRQ(ierr); 3452 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3453 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3454 ierr = PetscFree(vals);CHKERRQ(ierr); 3455 } else { 3456 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3457 } 3458 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3459 3460 /* Partition */ 3461 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3462 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3463 if (use_vwgt) { 3464 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3465 v_wgt[0] = local_size; 3466 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3467 } 3468 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3469 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3470 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3471 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3472 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3473 3474 /* renumber new_ranks to avoid "holes" in new set of processors */ 3475 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3476 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3477 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3478 if (!redprocs) { 3479 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3480 } else { 3481 PetscInt idxs[1]; 3482 PetscMPIInt tag; 3483 MPI_Request *reqs; 3484 3485 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3486 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3487 for (i=rstart;i<rend;i++) { 3488 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3489 } 3490 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3491 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3492 ierr = PetscFree(reqs);CHKERRQ(ierr); 3493 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3494 } 3495 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3496 /* clean up */ 3497 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3498 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3499 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3500 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3501 } 3502 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3503 3504 /* assemble parallel IS for sends */ 3505 i = 1; 3506 if (color) i=0; 3507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3508 /* get back IS */ 3509 *is_sends = ranks_send_to; 3510 PetscFunctionReturn(0); 3511 } 3512 3513 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatISSubassemble" 3517 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3518 { 3519 Mat local_mat; 3520 Mat_IS *matis; 3521 IS is_sends_internal; 3522 PetscInt rows,cols,new_local_rows; 3523 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3524 PetscBool ismatis,isdense,newisdense,destroy_mat; 3525 ISLocalToGlobalMapping l2gmap; 3526 PetscInt* l2gmap_indices; 3527 const PetscInt* is_indices; 3528 MatType new_local_type; 3529 /* buffers */ 3530 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3531 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3532 PetscInt *recv_buffer_idxs_local; 3533 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3534 /* MPI */ 3535 MPI_Comm comm,comm_n; 3536 PetscSubcomm subcomm; 3537 PetscMPIInt n_sends,n_recvs,commsize; 3538 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3539 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3540 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3541 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3542 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3543 PetscErrorCode ierr; 3544 3545 PetscFunctionBegin; 3546 /* TODO: add missing checks */ 3547 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3548 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3549 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3550 PetscValidLogicalCollectiveInt(mat,nis,7); 3551 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3552 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3553 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3554 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3555 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3556 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3557 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3558 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3559 PetscInt mrows,mcols,mnrows,mncols; 3560 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3561 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3562 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3563 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3564 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3565 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3566 } 3567 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3568 PetscValidLogicalCollectiveInt(mat,bs,0); 3569 /* prepare IS for sending if not provided */ 3570 if (!is_sends) { 3571 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3572 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3573 } else { 3574 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3575 is_sends_internal = is_sends; 3576 } 3577 3578 /* get pointer of MATIS data */ 3579 matis = (Mat_IS*)mat->data; 3580 3581 /* get comm */ 3582 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3583 3584 /* compute number of sends */ 3585 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3586 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3587 3588 /* compute number of receives */ 3589 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3590 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3591 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3592 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3593 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3594 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3595 ierr = PetscFree(iflags);CHKERRQ(ierr); 3596 3597 /* restrict comm if requested */ 3598 subcomm = 0; 3599 destroy_mat = PETSC_FALSE; 3600 if (restrict_comm) { 3601 PetscMPIInt color,subcommsize; 3602 3603 color = 0; 3604 if (restrict_full) { 3605 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3606 } else { 3607 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3608 } 3609 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3610 subcommsize = commsize - subcommsize; 3611 /* check if reuse has been requested */ 3612 if (reuse == MAT_REUSE_MATRIX) { 3613 if (*mat_n) { 3614 PetscMPIInt subcommsize2; 3615 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3616 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3617 comm_n = PetscObjectComm((PetscObject)*mat_n); 3618 } else { 3619 comm_n = PETSC_COMM_SELF; 3620 } 3621 } else { /* MAT_INITIAL_MATRIX */ 3622 PetscMPIInt rank; 3623 3624 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3625 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3626 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3627 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3628 comm_n = PetscSubcommChild(subcomm); 3629 } 3630 /* flag to destroy *mat_n if not significative */ 3631 if (color) destroy_mat = PETSC_TRUE; 3632 } else { 3633 comm_n = comm; 3634 } 3635 3636 /* prepare send/receive buffers */ 3637 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3638 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3639 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3640 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3641 if (nis) { 3642 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3643 } 3644 3645 /* Get data from local matrices */ 3646 if (!isdense) { 3647 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3648 /* TODO: See below some guidelines on how to prepare the local buffers */ 3649 /* 3650 send_buffer_vals should contain the raw values of the local matrix 3651 send_buffer_idxs should contain: 3652 - MatType_PRIVATE type 3653 - PetscInt size_of_l2gmap 3654 - PetscInt global_row_indices[size_of_l2gmap] 3655 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3656 */ 3657 } else { 3658 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3659 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3660 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3661 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3662 send_buffer_idxs[1] = i; 3663 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3664 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3665 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3666 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3667 for (i=0;i<n_sends;i++) { 3668 ilengths_vals[is_indices[i]] = len*len; 3669 ilengths_idxs[is_indices[i]] = len+2; 3670 } 3671 } 3672 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3673 /* additional is (if any) */ 3674 if (nis) { 3675 PetscMPIInt psum; 3676 PetscInt j; 3677 for (j=0,psum=0;j<nis;j++) { 3678 PetscInt plen; 3679 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3680 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3681 psum += len+1; /* indices + lenght */ 3682 } 3683 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3684 for (j=0,psum=0;j<nis;j++) { 3685 PetscInt plen; 3686 const PetscInt *is_array_idxs; 3687 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3688 send_buffer_idxs_is[psum] = plen; 3689 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3690 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3691 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3692 psum += plen+1; /* indices + lenght */ 3693 } 3694 for (i=0;i<n_sends;i++) { 3695 ilengths_idxs_is[is_indices[i]] = psum; 3696 } 3697 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3698 } 3699 3700 buf_size_idxs = 0; 3701 buf_size_vals = 0; 3702 buf_size_idxs_is = 0; 3703 for (i=0;i<n_recvs;i++) { 3704 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3705 buf_size_vals += (PetscInt)olengths_vals[i]; 3706 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3707 } 3708 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3709 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3710 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3711 3712 /* get new tags for clean communications */ 3713 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3714 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3715 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3716 3717 /* allocate for requests */ 3718 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3719 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3720 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3721 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3722 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3723 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3724 3725 /* communications */ 3726 ptr_idxs = recv_buffer_idxs; 3727 ptr_vals = recv_buffer_vals; 3728 ptr_idxs_is = recv_buffer_idxs_is; 3729 for (i=0;i<n_recvs;i++) { 3730 source_dest = onodes[i]; 3731 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3732 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3733 ptr_idxs += olengths_idxs[i]; 3734 ptr_vals += olengths_vals[i]; 3735 if (nis) { 3736 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3737 ptr_idxs_is += olengths_idxs_is[i]; 3738 } 3739 } 3740 for (i=0;i<n_sends;i++) { 3741 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3742 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3743 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3744 if (nis) { 3745 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3746 } 3747 } 3748 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3749 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3750 3751 /* assemble new l2g map */ 3752 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3753 ptr_idxs = recv_buffer_idxs; 3754 new_local_rows = 0; 3755 for (i=0;i<n_recvs;i++) { 3756 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3757 ptr_idxs += olengths_idxs[i]; 3758 } 3759 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3760 ptr_idxs = recv_buffer_idxs; 3761 new_local_rows = 0; 3762 for (i=0;i<n_recvs;i++) { 3763 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3764 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3765 ptr_idxs += olengths_idxs[i]; 3766 } 3767 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3768 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3769 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3770 3771 /* infer new local matrix type from received local matrices type */ 3772 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3773 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3774 if (n_recvs) { 3775 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3776 ptr_idxs = recv_buffer_idxs; 3777 for (i=0;i<n_recvs;i++) { 3778 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3779 new_local_type_private = MATAIJ_PRIVATE; 3780 break; 3781 } 3782 ptr_idxs += olengths_idxs[i]; 3783 } 3784 switch (new_local_type_private) { 3785 case MATDENSE_PRIVATE: 3786 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3787 new_local_type = MATSEQAIJ; 3788 bs = 1; 3789 } else { /* if I receive only 1 dense matrix */ 3790 new_local_type = MATSEQDENSE; 3791 bs = 1; 3792 } 3793 break; 3794 case MATAIJ_PRIVATE: 3795 new_local_type = MATSEQAIJ; 3796 bs = 1; 3797 break; 3798 case MATBAIJ_PRIVATE: 3799 new_local_type = MATSEQBAIJ; 3800 break; 3801 case MATSBAIJ_PRIVATE: 3802 new_local_type = MATSEQSBAIJ; 3803 break; 3804 default: 3805 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3806 break; 3807 } 3808 } else { /* by default, new_local_type is seqdense */ 3809 new_local_type = MATSEQDENSE; 3810 bs = 1; 3811 } 3812 3813 /* create MATIS object if needed */ 3814 if (reuse == MAT_INITIAL_MATRIX) { 3815 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3816 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3817 } else { 3818 /* it also destroys the local matrices */ 3819 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3820 } 3821 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3822 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3823 3824 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3825 3826 /* Global to local map of received indices */ 3827 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3828 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3829 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3830 3831 /* restore attributes -> type of incoming data and its size */ 3832 buf_size_idxs = 0; 3833 for (i=0;i<n_recvs;i++) { 3834 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3835 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3836 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3837 } 3838 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3839 3840 /* set preallocation */ 3841 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3842 if (!newisdense) { 3843 PetscInt *new_local_nnz=0; 3844 3845 ptr_vals = recv_buffer_vals; 3846 ptr_idxs = recv_buffer_idxs_local; 3847 if (n_recvs) { 3848 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3849 } 3850 for (i=0;i<n_recvs;i++) { 3851 PetscInt j; 3852 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3853 for (j=0;j<*(ptr_idxs+1);j++) { 3854 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3855 } 3856 } else { 3857 /* TODO */ 3858 } 3859 ptr_idxs += olengths_idxs[i]; 3860 } 3861 if (new_local_nnz) { 3862 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3863 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3864 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3865 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3866 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3867 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3868 } else { 3869 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3870 } 3871 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3872 } else { 3873 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3874 } 3875 3876 /* set values */ 3877 ptr_vals = recv_buffer_vals; 3878 ptr_idxs = recv_buffer_idxs_local; 3879 for (i=0;i<n_recvs;i++) { 3880 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3881 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3882 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3883 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3885 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3886 } else { 3887 /* TODO */ 3888 } 3889 ptr_idxs += olengths_idxs[i]; 3890 ptr_vals += olengths_vals[i]; 3891 } 3892 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3895 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3896 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3897 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3898 3899 #if 0 3900 if (!restrict_comm) { /* check */ 3901 Vec lvec,rvec; 3902 PetscReal infty_error; 3903 3904 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3905 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3906 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3907 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3908 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3909 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3910 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3911 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3912 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3913 } 3914 #endif 3915 3916 /* assemble new additional is (if any) */ 3917 if (nis) { 3918 PetscInt **temp_idxs,*count_is,j,psum; 3919 3920 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3921 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3922 ptr_idxs = recv_buffer_idxs_is; 3923 psum = 0; 3924 for (i=0;i<n_recvs;i++) { 3925 for (j=0;j<nis;j++) { 3926 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3927 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3928 psum += plen; 3929 ptr_idxs += plen+1; /* shift pointer to received data */ 3930 } 3931 } 3932 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3933 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3934 for (i=1;i<nis;i++) { 3935 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3936 } 3937 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3938 ptr_idxs = recv_buffer_idxs_is; 3939 for (i=0;i<n_recvs;i++) { 3940 for (j=0;j<nis;j++) { 3941 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3942 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3943 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3944 ptr_idxs += plen+1; /* shift pointer to received data */ 3945 } 3946 } 3947 for (i=0;i<nis;i++) { 3948 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3949 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3950 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3951 } 3952 ierr = PetscFree(count_is);CHKERRQ(ierr); 3953 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3954 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3955 } 3956 /* free workspace */ 3957 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3958 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3959 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3960 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3961 if (isdense) { 3962 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3963 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3964 } else { 3965 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3966 } 3967 if (nis) { 3968 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3969 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3970 } 3971 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3972 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3973 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3974 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3975 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3976 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3977 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3978 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3979 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3980 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3981 ierr = PetscFree(onodes);CHKERRQ(ierr); 3982 if (nis) { 3983 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3984 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3985 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3986 } 3987 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3988 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3989 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3990 for (i=0;i<nis;i++) { 3991 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3992 } 3993 *mat_n = NULL; 3994 } 3995 PetscFunctionReturn(0); 3996 } 3997 3998 /* temporary hack into ksp private data structure */ 3999 #include <petsc/private/kspimpl.h> 4000 4001 #undef __FUNCT__ 4002 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4003 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4004 { 4005 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4006 PC_IS *pcis = (PC_IS*)pc->data; 4007 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4008 MatNullSpace CoarseNullSpace=NULL; 4009 ISLocalToGlobalMapping coarse_islg; 4010 IS coarse_is,*isarray; 4011 PetscInt i,im_active=-1,active_procs=-1; 4012 PetscInt nis,nisdofs,nisneu; 4013 PC pc_temp; 4014 PCType coarse_pc_type; 4015 KSPType coarse_ksp_type; 4016 PetscBool multilevel_requested,multilevel_allowed; 4017 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4018 Mat t_coarse_mat_is; 4019 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4020 PetscMPIInt all_procs; 4021 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4022 PetscBool compute_vecs = PETSC_FALSE; 4023 PetscScalar *array; 4024 PetscErrorCode ierr; 4025 4026 PetscFunctionBegin; 4027 /* Assign global numbering to coarse dofs */ 4028 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4029 PetscInt ocoarse_size; 4030 compute_vecs = PETSC_TRUE; 4031 ocoarse_size = pcbddc->coarse_size; 4032 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4033 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4034 /* see if we can avoid some work */ 4035 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4036 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4037 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4038 PC pc; 4039 PetscBool isbddc; 4040 4041 /* temporary workaround since PCBDDC does not have a reset method so far */ 4042 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4043 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4044 if (isbddc) { 4045 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4046 } 4047 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4048 coarse_reuse = PETSC_FALSE; 4049 } else { /* we can safely reuse already computed coarse matrix */ 4050 coarse_reuse = PETSC_TRUE; 4051 } 4052 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4053 coarse_reuse = PETSC_FALSE; 4054 } 4055 /* reset any subassembling information */ 4056 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4057 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4058 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4059 coarse_reuse = PETSC_TRUE; 4060 } 4061 4062 /* count "active" (i.e. with positive local size) and "void" processes */ 4063 im_active = !!(pcis->n); 4064 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4065 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4066 void_procs = all_procs-active_procs; 4067 csin_type_simple = PETSC_TRUE; 4068 redist = PETSC_FALSE; 4069 if (pcbddc->current_level && void_procs) { 4070 csin_ml = PETSC_TRUE; 4071 ncoarse_ml = void_procs; 4072 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4073 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4074 csin_ds = PETSC_TRUE; 4075 ncoarse_ds = pcbddc->redistribute_coarse; 4076 redist = PETSC_TRUE; 4077 } else { 4078 csin_ds = PETSC_TRUE; 4079 ncoarse_ds = active_procs; 4080 redist = PETSC_TRUE; 4081 } 4082 } else { 4083 csin_ml = PETSC_FALSE; 4084 ncoarse_ml = all_procs; 4085 if (void_procs) { 4086 csin_ds = PETSC_TRUE; 4087 ncoarse_ds = void_procs; 4088 csin_type_simple = PETSC_FALSE; 4089 } else { 4090 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4091 csin_ds = PETSC_TRUE; 4092 ncoarse_ds = pcbddc->redistribute_coarse; 4093 redist = PETSC_TRUE; 4094 } else { 4095 csin_ds = PETSC_FALSE; 4096 ncoarse_ds = all_procs; 4097 } 4098 } 4099 } 4100 4101 /* 4102 test if we can go multilevel: three conditions must be satisfied: 4103 - we have not exceeded the number of levels requested 4104 - we can actually subassemble the active processes 4105 - we can find a suitable number of MPI processes where we can place the subassembled problem 4106 */ 4107 multilevel_allowed = PETSC_FALSE; 4108 multilevel_requested = PETSC_FALSE; 4109 if (pcbddc->current_level < pcbddc->max_levels) { 4110 multilevel_requested = PETSC_TRUE; 4111 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4112 multilevel_allowed = PETSC_FALSE; 4113 } else { 4114 multilevel_allowed = PETSC_TRUE; 4115 } 4116 } 4117 /* determine number of process partecipating to coarse solver */ 4118 if (multilevel_allowed) { 4119 ncoarse = ncoarse_ml; 4120 csin = csin_ml; 4121 redist = PETSC_FALSE; 4122 } else { 4123 ncoarse = ncoarse_ds; 4124 csin = csin_ds; 4125 } 4126 4127 /* creates temporary l2gmap and IS for coarse indexes */ 4128 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4129 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4130 4131 /* creates temporary MATIS object for coarse matrix */ 4132 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4133 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4134 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4135 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4136 #if 0 4137 { 4138 PetscViewer viewer; 4139 char filename[256]; 4140 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4141 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4142 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4143 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4144 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4145 } 4146 #endif 4147 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4148 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4149 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4152 4153 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4154 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4155 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4156 const PetscInt *idxs; 4157 ISLocalToGlobalMapping tmap; 4158 4159 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4160 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4161 /* allocate space for temporary storage */ 4162 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4163 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4164 /* allocate for IS array */ 4165 nisdofs = pcbddc->n_ISForDofsLocal; 4166 nisneu = !!pcbddc->NeumannBoundariesLocal; 4167 nis = nisdofs + nisneu; 4168 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4169 /* dofs splitting */ 4170 for (i=0;i<nisdofs;i++) { 4171 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4172 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4173 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4174 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4175 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4176 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4177 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4178 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4179 } 4180 /* neumann boundaries */ 4181 if (pcbddc->NeumannBoundariesLocal) { 4182 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4183 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4184 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4185 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4186 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4187 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4188 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4189 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4190 } 4191 /* free memory */ 4192 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4193 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4194 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4195 } else { 4196 nis = 0; 4197 nisdofs = 0; 4198 nisneu = 0; 4199 isarray = NULL; 4200 } 4201 /* destroy no longer needed map */ 4202 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4203 4204 /* restrict on coarse candidates (if needed) */ 4205 coarse_mat_is = NULL; 4206 if (csin) { 4207 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4208 if (redist) { 4209 PetscMPIInt rank; 4210 PetscInt spc,n_spc_p1,dest[1],destsize; 4211 4212 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4213 spc = active_procs/ncoarse; 4214 n_spc_p1 = active_procs%ncoarse; 4215 if (im_active) { 4216 destsize = 1; 4217 if (rank > n_spc_p1*(spc+1)-1) { 4218 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4219 } else { 4220 dest[0] = rank/(spc+1); 4221 } 4222 } else { 4223 destsize = 0; 4224 } 4225 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4226 } else if (csin_type_simple) { 4227 PetscMPIInt rank; 4228 PetscInt issize,isidx; 4229 4230 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4231 if (im_active) { 4232 issize = 1; 4233 isidx = (PetscInt)rank; 4234 } else { 4235 issize = 0; 4236 isidx = -1; 4237 } 4238 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4239 } else { /* get a suitable subassembling pattern from MATIS code */ 4240 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4241 } 4242 4243 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4244 if (!redist || ncoarse <= void_procs) { 4245 PetscInt ncoarse_cand,tissize,*nisindices; 4246 PetscInt *coarse_candidates; 4247 const PetscInt* tisindices; 4248 4249 /* get coarse candidates' ranks in pc communicator */ 4250 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4251 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4252 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4253 if (!coarse_candidates[i]) { 4254 coarse_candidates[ncoarse_cand++]=i; 4255 } 4256 } 4257 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4258 4259 4260 if (pcbddc->dbg_flag) { 4261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4263 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4265 for (i=0;i<ncoarse_cand;i++) { 4266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4267 } 4268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 } 4271 /* shift the pattern on coarse candidates */ 4272 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4273 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4274 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4275 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4276 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4277 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4278 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4279 } 4280 if (pcbddc->dbg_flag) { 4281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4282 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4283 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4285 } 4286 } 4287 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4288 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4289 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4290 } else { /* this is the last level, so use just receiving processes in subcomm */ 4291 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4292 } 4293 } else { 4294 if (pcbddc->dbg_flag) { 4295 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4296 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4297 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4298 } 4299 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4300 coarse_mat_is = t_coarse_mat_is; 4301 } 4302 4303 /* create local to global scatters for coarse problem */ 4304 if (compute_vecs) { 4305 PetscInt lrows; 4306 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4307 if (coarse_mat_is) { 4308 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4309 } else { 4310 lrows = 0; 4311 } 4312 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4313 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4314 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4315 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4316 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4317 } 4318 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4319 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4320 4321 /* set defaults for coarse KSP and PC */ 4322 if (multilevel_allowed) { 4323 coarse_ksp_type = KSPRICHARDSON; 4324 coarse_pc_type = PCBDDC; 4325 } else { 4326 coarse_ksp_type = KSPPREONLY; 4327 coarse_pc_type = PCREDUNDANT; 4328 } 4329 4330 /* print some info if requested */ 4331 if (pcbddc->dbg_flag) { 4332 if (!multilevel_allowed) { 4333 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4334 if (multilevel_requested) { 4335 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4336 } else if (pcbddc->max_levels) { 4337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4338 } 4339 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4340 } 4341 } 4342 4343 /* create the coarse KSP object only once with defaults */ 4344 if (coarse_mat_is) { 4345 MatReuse coarse_mat_reuse; 4346 PetscViewer dbg_viewer = NULL; 4347 if (pcbddc->dbg_flag) { 4348 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4349 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4350 } 4351 if (!pcbddc->coarse_ksp) { 4352 char prefix[256],str_level[16]; 4353 size_t len; 4354 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4355 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4356 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4357 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4358 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4359 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4360 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4361 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4362 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4363 /* prefix */ 4364 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4365 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4366 if (!pcbddc->current_level) { 4367 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4368 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4369 } else { 4370 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4371 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4372 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4373 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4374 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4375 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4376 } 4377 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4378 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4379 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4380 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4381 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4382 /* allow user customization */ 4383 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4384 } 4385 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4386 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4387 if (nisdofs) { 4388 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4389 for (i=0;i<nisdofs;i++) { 4390 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4391 } 4392 } 4393 if (nisneu) { 4394 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4395 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4396 } 4397 4398 /* get some info after set from options */ 4399 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4400 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4401 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4402 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4403 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4404 isbddc = PETSC_FALSE; 4405 } 4406 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4407 if (isredundant) { 4408 KSP inner_ksp; 4409 PC inner_pc; 4410 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4411 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4412 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4413 } 4414 4415 /* assemble coarse matrix */ 4416 if (coarse_reuse) { 4417 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4418 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4419 coarse_mat_reuse = MAT_REUSE_MATRIX; 4420 } else { 4421 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4422 } 4423 if (isbddc || isnn) { 4424 if (pcbddc->coarsening_ratio > 1) { 4425 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4426 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4427 if (pcbddc->dbg_flag) { 4428 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4429 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4430 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4431 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4432 } 4433 } 4434 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4435 } else { 4436 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4437 coarse_mat = coarse_mat_is; 4438 } 4439 } else { 4440 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4441 } 4442 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4443 4444 /* propagate symmetry info of coarse matrix */ 4445 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4446 if (pc->pmat->symmetric_set) { 4447 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4448 } 4449 if (pc->pmat->hermitian_set) { 4450 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4451 } 4452 if (pc->pmat->spd_set) { 4453 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4454 } 4455 /* set operators */ 4456 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4457 if (pcbddc->dbg_flag) { 4458 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4459 } 4460 } else { /* processes non partecipating to coarse solver (if any) */ 4461 coarse_mat = 0; 4462 } 4463 ierr = PetscFree(isarray);CHKERRQ(ierr); 4464 #if 0 4465 { 4466 PetscViewer viewer; 4467 char filename[256]; 4468 sprintf(filename,"coarse_mat.m"); 4469 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4470 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4471 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4472 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4473 } 4474 #endif 4475 4476 /* Compute coarse null space (special handling by BDDC only) */ 4477 #if 0 4478 if (pcbddc->NullSpace) { 4479 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4480 } 4481 #endif 4482 4483 if (pcbddc->coarse_ksp) { 4484 Vec crhs,csol; 4485 PetscBool ispreonly; 4486 4487 if (CoarseNullSpace) { 4488 if (isbddc) { 4489 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4490 } else { 4491 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4492 } 4493 } 4494 /* setup coarse ksp */ 4495 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4496 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4497 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4498 /* hack */ 4499 if (!csol) { 4500 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4501 } 4502 if (!crhs) { 4503 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4504 } 4505 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4506 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4507 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4508 KSP check_ksp; 4509 KSPType check_ksp_type; 4510 PC check_pc; 4511 Vec check_vec,coarse_vec; 4512 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4513 PetscInt its; 4514 PetscBool compute_eigs; 4515 PetscReal *eigs_r,*eigs_c; 4516 PetscInt neigs; 4517 const char *prefix; 4518 4519 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4520 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4521 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4522 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4523 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4524 if (ispreonly) { 4525 check_ksp_type = KSPPREONLY; 4526 compute_eigs = PETSC_FALSE; 4527 } else { 4528 check_ksp_type = KSPGMRES; 4529 compute_eigs = PETSC_TRUE; 4530 } 4531 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4532 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4533 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4534 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4535 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4536 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4537 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4538 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4539 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4540 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4541 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4542 /* create random vec */ 4543 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4544 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4545 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4546 if (CoarseNullSpace) { 4547 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4548 } 4549 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4550 /* solve coarse problem */ 4551 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4552 if (CoarseNullSpace) { 4553 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4554 } 4555 /* set eigenvalue estimation if preonly has not been requested */ 4556 if (compute_eigs) { 4557 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4558 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4559 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4560 lambda_max = eigs_r[neigs-1]; 4561 lambda_min = eigs_r[0]; 4562 if (pcbddc->use_coarse_estimates) { 4563 if (lambda_max>lambda_min) { 4564 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4565 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4566 } 4567 } 4568 } 4569 4570 /* check coarse problem residual error */ 4571 if (pcbddc->dbg_flag) { 4572 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4573 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4574 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4575 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4576 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4577 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4578 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4579 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4580 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4581 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4582 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4583 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4584 if (compute_eigs) { 4585 PetscReal lambda_max_s,lambda_min_s; 4586 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4587 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4588 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4589 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4590 for (i=0;i<neigs;i++) { 4591 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4592 } 4593 } 4594 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4595 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4596 } 4597 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4598 if (compute_eigs) { 4599 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4600 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4601 } 4602 } 4603 } 4604 /* print additional info */ 4605 if (pcbddc->dbg_flag) { 4606 /* waits until all processes reaches this point */ 4607 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4610 } 4611 4612 /* free memory */ 4613 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4614 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4615 PetscFunctionReturn(0); 4616 } 4617 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4620 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4621 { 4622 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4623 PC_IS* pcis = (PC_IS*)pc->data; 4624 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4625 IS subset,subset_mult,subset_n; 4626 PetscInt local_size,coarse_size=0; 4627 PetscInt *local_primal_indices=NULL; 4628 const PetscInt *t_local_primal_indices; 4629 PetscErrorCode ierr; 4630 4631 PetscFunctionBegin; 4632 /* Compute global number of coarse dofs */ 4633 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4634 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4635 } 4636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4637 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4638 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4639 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4640 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4641 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4642 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4643 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4644 if (local_size != pcbddc->local_primal_size) { 4645 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4646 } 4647 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4648 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4649 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4650 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4651 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4652 4653 /* check numbering */ 4654 if (pcbddc->dbg_flag) { 4655 PetscScalar coarsesum,*array; 4656 PetscInt i; 4657 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4658 4659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4662 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4663 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4664 for (i=0;i<pcbddc->local_primal_size;i++) { 4665 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4666 } 4667 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4668 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4669 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4670 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4671 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4672 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4673 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4674 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4675 for (i=0;i<pcis->n;i++) { 4676 if (array[i] == 1.0) { 4677 set_error = PETSC_TRUE; 4678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4679 } 4680 } 4681 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4682 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4683 for (i=0;i<pcis->n;i++) { 4684 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4685 } 4686 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4687 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4688 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4689 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4690 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4691 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4692 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4693 PetscInt *gidxs; 4694 4695 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4696 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4697 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4698 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4700 for (i=0;i<pcbddc->local_primal_size;i++) { 4701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4702 } 4703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4704 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4705 } 4706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4707 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4708 } 4709 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4710 /* get back data */ 4711 *coarse_size_n = coarse_size; 4712 *local_primal_indices_n = local_primal_indices; 4713 PetscFunctionReturn(0); 4714 } 4715 4716 #undef __FUNCT__ 4717 #define __FUNCT__ "PCBDDCGlobalToLocal" 4718 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4719 { 4720 IS localis_t; 4721 PetscInt i,lsize,*idxs,n; 4722 PetscScalar *vals; 4723 PetscErrorCode ierr; 4724 4725 PetscFunctionBegin; 4726 /* get indices in local ordering exploiting local to global map */ 4727 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4728 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4729 for (i=0;i<lsize;i++) vals[i] = 1.0; 4730 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4731 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4732 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4733 if (idxs) { /* multilevel guard */ 4734 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4735 } 4736 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4737 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4738 ierr = PetscFree(vals);CHKERRQ(ierr); 4739 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4740 /* now compute set in local ordering */ 4741 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4742 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4743 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4744 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4745 for (i=0,lsize=0;i<n;i++) { 4746 if (PetscRealPart(vals[i]) > 0.5) { 4747 lsize++; 4748 } 4749 } 4750 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4751 for (i=0,lsize=0;i<n;i++) { 4752 if (PetscRealPart(vals[i]) > 0.5) { 4753 idxs[lsize++] = i; 4754 } 4755 } 4756 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4757 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4758 *localis = localis_t; 4759 PetscFunctionReturn(0); 4760 } 4761 4762 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4763 #undef __FUNCT__ 4764 #define __FUNCT__ "PCBDDCMatMult_Private" 4765 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4766 { 4767 PCBDDCChange_ctx change_ctx; 4768 PetscErrorCode ierr; 4769 4770 PetscFunctionBegin; 4771 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4772 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4773 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4774 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4775 PetscFunctionReturn(0); 4776 } 4777 4778 #undef __FUNCT__ 4779 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4780 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4781 { 4782 PCBDDCChange_ctx change_ctx; 4783 PetscErrorCode ierr; 4784 4785 PetscFunctionBegin; 4786 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4787 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4788 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4789 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4790 PetscFunctionReturn(0); 4791 } 4792 4793 #undef __FUNCT__ 4794 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4795 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4796 { 4797 PC_IS *pcis=(PC_IS*)pc->data; 4798 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4799 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4800 Mat S_j; 4801 PetscInt *used_xadj,*used_adjncy; 4802 PetscBool free_used_adj; 4803 PetscErrorCode ierr; 4804 4805 PetscFunctionBegin; 4806 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4807 free_used_adj = PETSC_FALSE; 4808 if (pcbddc->sub_schurs_layers == -1) { 4809 used_xadj = NULL; 4810 used_adjncy = NULL; 4811 } else { 4812 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4813 used_xadj = pcbddc->mat_graph->xadj; 4814 used_adjncy = pcbddc->mat_graph->adjncy; 4815 } else if (pcbddc->computed_rowadj) { 4816 used_xadj = pcbddc->mat_graph->xadj; 4817 used_adjncy = pcbddc->mat_graph->adjncy; 4818 } else { 4819 PetscBool flg_row=PETSC_FALSE; 4820 const PetscInt *xadj,*adjncy; 4821 PetscInt nvtxs; 4822 4823 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4824 if (flg_row) { 4825 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4826 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4827 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4828 free_used_adj = PETSC_TRUE; 4829 } else { 4830 pcbddc->sub_schurs_layers = -1; 4831 used_xadj = NULL; 4832 used_adjncy = NULL; 4833 } 4834 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4835 } 4836 } 4837 4838 /* setup sub_schurs data */ 4839 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4840 if (!sub_schurs->use_mumps) { 4841 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4842 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4843 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4844 } else { 4845 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4846 PetscBool isseqaij; 4847 if (!pcbddc->use_vertices && reuse_solvers) { 4848 PetscInt n_vertices; 4849 4850 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4851 reuse_solvers = (PetscBool)!n_vertices; 4852 } 4853 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4854 if (!isseqaij) { 4855 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4856 if (matis->A == pcbddc->local_mat) { 4857 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4858 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4859 } else { 4860 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4861 } 4862 } 4863 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4864 } 4865 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4866 4867 /* free adjacency */ 4868 if (free_used_adj) { 4869 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4870 } 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "PCBDDCInitSubSchurs" 4876 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4877 { 4878 PC_IS *pcis=(PC_IS*)pc->data; 4879 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4880 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4881 PCBDDCGraph graph; 4882 PetscErrorCode ierr; 4883 4884 PetscFunctionBegin; 4885 /* attach interface graph for determining subsets */ 4886 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4887 IS verticesIS,verticescomm; 4888 PetscInt vsize,*idxs; 4889 4890 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4891 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4892 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4893 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4894 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4895 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4896 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4897 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4898 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4899 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4900 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4901 /* 4902 if (pcbddc->dbg_flag) { 4903 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4904 } 4905 */ 4906 } else { 4907 graph = pcbddc->mat_graph; 4908 } 4909 4910 /* sub_schurs init */ 4911 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4912 4913 /* free graph struct */ 4914 if (pcbddc->sub_schurs_rebuild) { 4915 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4916 } 4917 PetscFunctionReturn(0); 4918 } 4919