1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 273 } else { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 291 for (j=0;j<B_N;j++) { 292 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 293 } 294 } 295 } 296 #if 0 297 for (j=0;j<B_neigs;j++) { 298 PetscBLASInt Blas_N,Blas_one = 1.0; 299 PetscScalar norm; 300 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 301 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 302 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 303 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 304 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 305 } else { 306 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 307 } 308 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 } 310 #endif 311 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 312 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 313 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 314 cum++; 315 } 316 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 317 /* shift for next computation */ 318 cumarray += subset_size*subset_size; 319 } 320 if (pcbddc->dbg_flag) { 321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 322 } 323 324 if (mss) { 325 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 326 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 327 /* destroy matrices (junk) */ 328 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 329 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 330 } 331 if (allocated_S_St) { 332 ierr = PetscFree2(S,St);CHKERRQ(ierr); 333 } 334 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 335 #if defined(PETSC_USE_COMPLEX) 336 ierr = PetscFree(rwork);CHKERRQ(ierr); 337 #endif 338 if (pcbddc->dbg_flag) { 339 PetscInt maxneigs_r; 340 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCSetUpSolvers" 348 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 349 { 350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 351 PetscScalar *coarse_submat_vals; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 /* Setup local scatters R_to_B and (optionally) R_to_D */ 356 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 357 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 358 359 /* Setup local neumann solver ksp_R */ 360 /* PCBDDCSetUpLocalScatters should be called first! */ 361 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 362 363 /* Change global null space passed in by the user if change of basis has been requested */ 364 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 365 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 366 } 367 368 /* 369 Setup local correction and local part of coarse basis. 370 Gives back the dense local part of the coarse matrix in column major ordering 371 */ 372 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 373 374 /* Compute total number of coarse nodes and setup coarse solver */ 375 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 376 377 /* free */ 378 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCBDDCResetCustomization" 384 PetscErrorCode PCBDDCResetCustomization(PC pc) 385 { 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PetscErrorCode ierr; 388 389 PetscFunctionBegin; 390 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 392 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 393 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 394 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 396 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 397 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 398 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 399 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 400 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetTopography" 406 PetscErrorCode PCBDDCResetTopography(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 415 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 416 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCResetSolvers" 422 PetscErrorCode PCBDDCResetSolvers(PC pc) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 PetscScalar *array; 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 430 if (pcbddc->coarse_phi_B) { 431 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 432 ierr = PetscFree(array);CHKERRQ(ierr); 433 } 434 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 435 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 436 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 438 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 442 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 445 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 446 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 447 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 448 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 449 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 450 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 453 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 454 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 455 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 456 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 462 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 463 { 464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 465 PC_IS *pcis = (PC_IS*)pc->data; 466 VecType impVecType; 467 PetscInt n_constraints,n_R,old_size; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 if (!pcbddc->ConstraintMatrix) { 472 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 473 } 474 /* get sizes */ 475 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 476 n_R = pcis->n-pcbddc->n_vertices; 477 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 478 /* local work vectors (try to avoid unneeded work)*/ 479 /* R nodes */ 480 old_size = -1; 481 if (pcbddc->vec1_R) { 482 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 483 } 484 if (n_R != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 487 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 488 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 489 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 490 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 491 } 492 /* local primal dofs */ 493 old_size = -1; 494 if (pcbddc->vec1_P) { 495 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 496 } 497 if (pcbddc->local_primal_size != old_size) { 498 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 502 } 503 /* local explicit constraints */ 504 old_size = -1; 505 if (pcbddc->vec1_C) { 506 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 507 } 508 if (n_constraints && n_constraints != old_size) { 509 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 511 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 512 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 513 } 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCBDDCSetUpCorrection" 519 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 520 { 521 PetscErrorCode ierr; 522 /* pointers to pcis and pcbddc */ 523 PC_IS* pcis = (PC_IS*)pc->data; 524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 526 /* submatrices of local problem */ 527 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 528 /* submatrices of local coarse problem */ 529 Mat S_VV,S_CV,S_VC,S_CC; 530 /* working matrices */ 531 Mat C_CR; 532 /* additional working stuff */ 533 PC pc_R; 534 Mat F; 535 PetscBool isLU,isCHOL,isILU; 536 537 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 538 PetscScalar *work; 539 PetscInt *idx_V_B; 540 PetscInt n,n_vertices,n_constraints; 541 PetscInt i,n_R,n_D,n_B; 542 PetscBool unsymmetric_check; 543 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 544 MatType impMatType; 545 /* some shortcuts to scalars */ 546 PetscScalar one=1.0,m_one=-1.0; 547 548 PetscFunctionBegin; 549 n_vertices = pcbddc->n_vertices; 550 n_constraints = pcbddc->local_primal_size-n_vertices; 551 /* Set Non-overlapping dimensions */ 552 n_B = pcis->n_B; 553 n_D = pcis->n - n_B; 554 n_R = pcis->n - n_vertices; 555 556 /* Set types for local objects needed by BDDC precondtioner */ 557 impMatType = MATSEQDENSE; 558 559 /* vertices in boundary numbering */ 560 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 561 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 562 if (i != n_vertices) { 563 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 564 } 565 566 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 567 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 568 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 569 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 571 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 573 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 576 577 unsymmetric_check = PETSC_FALSE; 578 /* allocate workspace */ 579 n = 0; 580 if (n_constraints) { 581 n += n_R*n_constraints; 582 } 583 if (n_vertices) { 584 n = PetscMax(2*n_R*n_vertices,n); 585 n = PetscMax((n_R+n_B)*n_vertices,n); 586 } 587 if (!pcbddc->symmetric_primal) { 588 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 589 unsymmetric_check = PETSC_TRUE; 590 } 591 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 592 593 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 594 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 595 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 596 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 597 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 598 if (isLU || isILU || isCHOL) { 599 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 600 } else if (sub_schurs->reuse_mumps) { 601 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 602 MatFactorType type; 603 604 F = reuse_mumps->F; 605 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 606 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 607 } else { 608 F = NULL; 609 } 610 611 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 612 if (n_constraints) { 613 Mat M1,M2,M3; 614 Mat auxmat; 615 IS is_aux; 616 PetscScalar *array,*array2; 617 618 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 627 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 641 if (F) { 642 Mat B; 643 644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 645 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 646 ierr = MatDestroy(&B);CHKERRQ(ierr); 647 } else { 648 PetscScalar *marr; 649 650 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 651 for (i=0;i<n_constraints;i++) { 652 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 653 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 654 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 657 } 658 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 659 } 660 if (!pcbddc->switch_static) { 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 662 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 663 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 664 for (i=0;i<n_constraints;i++) { 665 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 666 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 667 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 670 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 671 } 672 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 674 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 675 } else { 676 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 677 pcbddc->local_auxmat2 = local_auxmat2_R; 678 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 679 } 680 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 681 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 682 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 683 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 684 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 685 if (isCHOL) { 686 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 687 } else { 688 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 689 } 690 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 691 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 692 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 693 ierr = MatDestroy(&M2);CHKERRQ(ierr); 694 ierr = MatDestroy(&M3);CHKERRQ(ierr); 695 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 696 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 697 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 698 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 699 ierr = MatDestroy(&M1);CHKERRQ(ierr); 700 } 701 /* Get submatrices from subdomain matrix */ 702 if (n_vertices) { 703 Mat newmat; 704 IS is_aux; 705 PetscInt ibs,mbs; 706 PetscBool issbaij; 707 708 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 709 IS tis; 710 711 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 712 ierr = ISSort(tis);CHKERRQ(ierr); 713 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 714 ierr = ISDestroy(&tis);CHKERRQ(ierr); 715 } else { 716 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 } 718 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 719 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 720 if (ibs != mbs) { /* need to convert to SEQAIJ */ 721 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 724 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 725 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 727 } else { 728 /* this is safe */ 729 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 731 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 732 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 734 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 735 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 736 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 738 } else { 739 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 741 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 742 } 743 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 744 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 745 } 746 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 747 } 748 749 /* Matrix of coarse basis functions (local) */ 750 if (pcbddc->coarse_phi_B) { 751 PetscInt on_B,on_primal,on_D=n_D; 752 if (pcbddc->coarse_phi_D) { 753 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 754 } 755 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 756 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 757 PetscScalar *marray; 758 759 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 760 ierr = PetscFree(marray);CHKERRQ(ierr); 761 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 762 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 763 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 765 } 766 } 767 768 if (!pcbddc->coarse_phi_B) { 769 PetscScalar *marray; 770 771 n = n_B*pcbddc->local_primal_size; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 n += n_D*pcbddc->local_primal_size; 774 } 775 if (!pcbddc->symmetric_primal) { 776 n *= 2; 777 } 778 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 780 n = n_B*pcbddc->local_primal_size; 781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 783 n += n_D*pcbddc->local_primal_size; 784 } 785 if (!pcbddc->symmetric_primal) { 786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 n = n_B*pcbddc->local_primal_size; 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 793 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 796 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 797 } 798 } 799 } 800 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 801 /* vertices */ 802 if (n_vertices) { 803 804 if (n_R) { 805 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 806 PetscBLASInt B_N,B_one = 1; 807 PetscScalar *x,*y; 808 809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 810 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 811 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 812 if (F) { /* TODO could be optimized for symmetric problems */ 813 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 814 } else { 815 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 816 for (i=0;i<n_vertices;i++) { 817 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 818 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 819 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 820 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 822 } 823 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 826 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 827 if (n_constraints) { 828 Mat B; 829 830 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 831 for (i=0;i<n_vertices;i++) { 832 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 838 } 839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 841 ierr = MatDestroy(&B);CHKERRQ(ierr); 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 843 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 844 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 845 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 846 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 847 ierr = MatDestroy(&B);CHKERRQ(ierr); 848 } 849 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 850 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 851 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 852 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 854 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 855 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 856 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 857 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 858 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 859 } else { 860 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 861 } 862 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 863 /* coarse basis functions */ 864 for (i=0;i<n_vertices;i++) { 865 PetscScalar *y; 866 867 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 868 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 870 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 y[n_B*i+idx_V_B[i]] = 1.0; 873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 875 876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 877 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 878 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 879 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 883 } 884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 885 } 886 } 887 888 if (n_constraints) { 889 Mat B; 890 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 892 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 893 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 894 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 895 if (n_vertices) { 896 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 897 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 898 } else { 899 Mat S_VCt; 900 901 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 902 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 903 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<n_constraints;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 919 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 921 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 924 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 925 } 926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 927 } 928 } 929 if (n_constraints) { 930 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 931 } 932 933 /* compute other basis functions for non-symmetric problems */ 934 if (!pcbddc->symmetric_primal) { 935 936 if (n_constraints) { 937 Mat S_CCT,B_C; 938 939 /* this is a lazy thing */ 940 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 942 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 944 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 945 if (n_vertices) { 946 Mat B_V,S_VCT; 947 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 949 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 950 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 951 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 952 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 953 } 954 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 955 } else { /* if there are no constraints, reset work */ 956 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 957 } 958 if (n_vertices && n_R) { 959 Mat A_VRT; 960 PetscScalar *marray; 961 PetscBLASInt B_N,B_one = 1; 962 963 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 964 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 965 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 966 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 967 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 968 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 969 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 970 } 971 972 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 973 for (i=0;i<pcbddc->local_primal_size;i++) { 974 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 975 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 976 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 977 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 979 } 980 } else { 981 for (i=0;i<pcbddc->local_primal_size;i++) { 982 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 983 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 984 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 986 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 987 } 988 } 989 /* coarse basis functions */ 990 for (i=0;i<pcbddc->local_primal_size;i++) { 991 PetscScalar *y; 992 993 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 994 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 996 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 if (i<n_vertices) { 999 y[n_B*i+idx_V_B[i]] = 1.0; 1000 } 1001 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1003 1004 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1005 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1006 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1007 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1010 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1011 } 1012 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1013 } 1014 } 1015 /* free memory */ 1016 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1017 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1018 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1019 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1020 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1021 ierr = PetscFree(work);CHKERRQ(ierr); 1022 if (n_vertices) { 1023 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1024 } 1025 if (n_constraints) { 1026 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1027 } 1028 /* Checking coarse_sub_mat and coarse basis functios */ 1029 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1030 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1031 if (pcbddc->dbg_flag) { 1032 Mat coarse_sub_mat; 1033 Mat AUXMAT,TM1,TM2,TM3,TM4; 1034 Mat coarse_phi_D,coarse_phi_B; 1035 Mat coarse_psi_D,coarse_psi_B; 1036 Mat A_II,A_BB,A_IB,A_BI; 1037 Mat C_B,CPHI; 1038 IS is_dummy; 1039 Vec mones; 1040 MatType checkmattype=MATSEQAIJ; 1041 PetscReal real_value; 1042 1043 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1044 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1045 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1046 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1047 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1048 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1049 if (unsymmetric_check) { 1050 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1051 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1052 } 1053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1054 1055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1058 if (unsymmetric_check) { 1059 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1060 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1061 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1063 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1066 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1071 } else { 1072 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1073 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1074 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1077 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1078 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1080 } 1081 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1082 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1083 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1084 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1085 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1086 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 1090 /* check constraints */ 1091 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1093 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1094 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1095 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1096 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1097 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1099 if (unsymmetric_check) { 1100 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1101 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1102 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1103 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1105 } 1106 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1107 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1109 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1113 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1114 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1115 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1116 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1117 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1118 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1119 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1120 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1121 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1122 if (unsymmetric_check) { 1123 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1124 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1125 } 1126 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1127 } 1128 /* get back data */ 1129 *coarse_submat_vals_n = coarse_submat_vals; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1136 { 1137 Mat *work_mat; 1138 IS isrow_s,iscol_s; 1139 PetscBool rsorted,csorted; 1140 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1145 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1146 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1148 1149 if (!rsorted) { 1150 const PetscInt *idxs; 1151 PetscInt *idxs_sorted,i; 1152 1153 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1155 for (i=0;i<rsize;i++) { 1156 idxs_perm_r[i] = i; 1157 } 1158 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1159 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1160 for (i=0;i<rsize;i++) { 1161 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1162 } 1163 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1164 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1167 isrow_s = isrow; 1168 } 1169 1170 if (!csorted) { 1171 if (isrow == iscol) { 1172 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1173 iscol_s = isrow_s; 1174 } else { 1175 const PetscInt *idxs; 1176 PetscInt *idxs_sorted,i; 1177 1178 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1179 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1180 for (i=0;i<csize;i++) { 1181 idxs_perm_c[i] = i; 1182 } 1183 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1184 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1185 for (i=0;i<csize;i++) { 1186 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1187 } 1188 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1189 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1190 } 1191 } else { 1192 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1193 iscol_s = iscol; 1194 } 1195 1196 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1197 1198 if (!rsorted || !csorted) { 1199 Mat new_mat; 1200 IS is_perm_r,is_perm_c; 1201 1202 if (!rsorted) { 1203 PetscInt *idxs_r,i; 1204 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1205 for (i=0;i<rsize;i++) { 1206 idxs_r[idxs_perm_r[i]] = i; 1207 } 1208 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1209 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1210 } else { 1211 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1212 } 1213 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1214 1215 if (!csorted) { 1216 if (isrow_s == iscol_s) { 1217 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1218 is_perm_c = is_perm_r; 1219 } else { 1220 PetscInt *idxs_c,i; 1221 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1222 for (i=0;i<csize;i++) { 1223 idxs_c[idxs_perm_c[i]] = i; 1224 } 1225 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1227 } 1228 } else { 1229 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1230 } 1231 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1232 1233 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1234 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1235 work_mat[0] = new_mat; 1236 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1237 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1238 } 1239 1240 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1241 *B = work_mat[0]; 1242 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1243 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1244 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1251 { 1252 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1253 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1254 Mat new_mat; 1255 IS is_local,is_global; 1256 PetscInt local_size; 1257 PetscBool isseqaij; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1262 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1263 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1265 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1266 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1267 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1268 1269 /* check */ 1270 if (pcbddc->dbg_flag) { 1271 Vec x,x_change; 1272 PetscReal error; 1273 1274 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1275 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1276 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1280 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1282 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1283 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1286 ierr = VecDestroy(&x);CHKERRQ(ierr); 1287 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1288 } 1289 1290 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1291 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1292 if (isseqaij) { 1293 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1294 } else { 1295 Mat work_mat; 1296 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1297 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1298 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1299 } 1300 if (matis->A->symmetric_set) { 1301 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1302 #if !defined(PETSC_USE_COMPLEX) 1303 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1304 #endif 1305 } 1306 /* 1307 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1308 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1309 */ 1310 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1316 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1317 { 1318 PC_IS* pcis = (PC_IS*)(pc->data); 1319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1320 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1321 PetscInt *idx_R_local=NULL; 1322 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1323 PetscInt vbs,bs; 1324 PetscBT bitmask=NULL; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* 1329 No need to setup local scatters if 1330 - primal space is unchanged 1331 AND 1332 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1333 AND 1334 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1335 */ 1336 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1337 PetscFunctionReturn(0); 1338 } 1339 /* destroy old objects */ 1340 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1341 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1343 /* Set Non-overlapping dimensions */ 1344 n_B = pcis->n_B; 1345 n_D = pcis->n - n_B; 1346 n_vertices = pcbddc->n_vertices; 1347 1348 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1349 1350 /* create auxiliary bitmask and allocate workspace */ 1351 if (!sub_schurs->reuse_mumps) { 1352 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1353 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1354 for (i=0;i<n_vertices;i++) { 1355 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1356 } 1357 1358 for (i=0, n_R=0; i<pcis->n; i++) { 1359 if (!PetscBTLookup(bitmask,i)) { 1360 idx_R_local[n_R++] = i; 1361 } 1362 } 1363 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1364 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1365 1366 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1367 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1368 } 1369 1370 /* Block code */ 1371 vbs = 1; 1372 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1373 if (bs>1 && !(n_vertices%bs)) { 1374 PetscBool is_blocked = PETSC_TRUE; 1375 PetscInt *vary; 1376 if (!sub_schurs->reuse_mumps) { 1377 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1378 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1379 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1380 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1381 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1382 for (i=0; i<pcis->n/bs; i++) { 1383 if (vary[i]!=0 && vary[i]!=bs) { 1384 is_blocked = PETSC_FALSE; 1385 break; 1386 } 1387 } 1388 ierr = PetscFree(vary);CHKERRQ(ierr); 1389 } else { 1390 /* Verify directly the R set */ 1391 for (i=0; i<n_R/bs; i++) { 1392 PetscInt j,node=idx_R_local[bs*i]; 1393 for (j=1; j<bs; j++) { 1394 if (node != idx_R_local[bs*i+j]-j) { 1395 is_blocked = PETSC_FALSE; 1396 break; 1397 } 1398 } 1399 } 1400 } 1401 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1402 vbs = bs; 1403 for (i=0;i<n_R/vbs;i++) { 1404 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1405 } 1406 } 1407 } 1408 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1409 if (sub_schurs->reuse_mumps) { 1410 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1411 1412 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1413 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1414 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1415 reuse_mumps->is_R = pcbddc->is_R_local; 1416 } else { 1417 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1418 } 1419 1420 /* print some info if requested */ 1421 if (pcbddc->dbg_flag) { 1422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1423 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1425 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1429 } 1430 1431 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1432 if (!sub_schurs->reuse_mumps) { 1433 IS is_aux1,is_aux2; 1434 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1435 1436 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1437 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1439 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1440 for (i=0; i<n_D; i++) { 1441 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1442 } 1443 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 for (i=0, j=0; i<n_R; i++) { 1445 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1446 aux_array1[j++] = i; 1447 } 1448 } 1449 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1450 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_B; i++) { 1452 if (!PetscBTLookup(bitmask,is_indices[i])) { 1453 aux_array2[j++] = i; 1454 } 1455 } 1456 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1457 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1458 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1459 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1460 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1461 1462 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1463 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1464 for (i=0, j=0; i<n_R; i++) { 1465 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1466 aux_array1[j++] = i; 1467 } 1468 } 1469 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1470 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1471 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1472 } 1473 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1474 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1475 } else { 1476 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1477 IS tis; 1478 PetscInt schur_size; 1479 1480 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1481 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1482 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1483 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1484 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1485 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1486 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1487 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1488 } 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1496 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1497 { 1498 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1499 PC_IS *pcis = (PC_IS*)pc->data; 1500 PC pc_temp; 1501 Mat A_RR; 1502 MatReuse reuse; 1503 PetscScalar m_one = -1.0; 1504 PetscReal value; 1505 PetscInt n_D,n_R,ibs,mbs; 1506 PetscBool use_exact,use_exact_reduced,issbaij; 1507 PetscErrorCode ierr; 1508 /* prefixes stuff */ 1509 char dir_prefix[256],neu_prefix[256],str_level[16]; 1510 size_t len; 1511 1512 PetscFunctionBegin; 1513 1514 /* compute prefixes */ 1515 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1516 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1517 if (!pcbddc->current_level) { 1518 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1519 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1522 } else { 1523 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1524 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1525 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1526 len -= 15; /* remove "pc_bddc_coarse_" */ 1527 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1528 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1529 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1530 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1531 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1532 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1533 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1534 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1535 } 1536 1537 /* DIRICHLET PROBLEM */ 1538 if (dirichlet) { 1539 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1540 if (pcbddc->local_mat->symmetric_set) { 1541 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1542 } 1543 /* Matrix for Dirichlet problem is pcis->A_II */ 1544 n_D = pcis->n - pcis->n_B; 1545 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1546 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1548 /* default */ 1549 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1550 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1551 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1552 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1553 if (issbaij) { 1554 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1555 } else { 1556 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1557 } 1558 /* Allow user's customization */ 1559 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1560 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1561 } 1562 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1563 if (sub_schurs->reuse_mumps) { 1564 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1565 1566 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1567 } 1568 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1569 if (!n_D) { 1570 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1571 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1572 } 1573 /* Set Up KSP for Dirichlet problem of BDDC */ 1574 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1575 /* set ksp_D into pcis data */ 1576 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1577 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1578 pcis->ksp_D = pcbddc->ksp_D; 1579 } 1580 1581 /* NEUMANN PROBLEM */ 1582 A_RR = 0; 1583 if (neumann) { 1584 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1585 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1586 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1587 if (pcbddc->ksp_R) { /* already created ksp */ 1588 PetscInt nn_R; 1589 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1590 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1591 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1592 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1593 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1594 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1595 reuse = MAT_INITIAL_MATRIX; 1596 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1597 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1598 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1599 reuse = MAT_INITIAL_MATRIX; 1600 } else { /* safe to reuse the matrix */ 1601 reuse = MAT_REUSE_MATRIX; 1602 } 1603 } 1604 /* last check */ 1605 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1606 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1607 reuse = MAT_INITIAL_MATRIX; 1608 } 1609 } else { /* first time, so we need to create the matrix */ 1610 reuse = MAT_INITIAL_MATRIX; 1611 } 1612 /* extract A_RR */ 1613 if (!sub_schurs->reuse_mumps) { 1614 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1615 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1616 if (ibs != mbs) { 1617 Mat newmat; 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1619 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1620 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1623 } 1624 if (pcbddc->local_mat->symmetric_set) { 1625 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1626 } 1627 } else { 1628 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1629 1630 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1631 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1632 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1633 } 1634 if (!pcbddc->ksp_R) { /* create object if not present */ 1635 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1637 /* default */ 1638 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1639 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1640 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1641 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1642 if (issbaij) { 1643 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1644 } else { 1645 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1646 } 1647 /* Allow user's customization */ 1648 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1649 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1650 } 1651 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1652 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1653 if (!n_R) { 1654 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1655 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1656 } 1657 /* Reuse MUMPS solver if it is present */ 1658 if (sub_schurs->reuse_mumps) { 1659 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1660 1661 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1662 } 1663 /* Set Up KSP for Neumann problem of BDDC */ 1664 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1665 } 1666 /* free Neumann problem's matrix */ 1667 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1668 1669 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1670 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1671 if (pcbddc->dbg_flag) { 1672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1673 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1675 } 1676 if (dirichlet) { /* Dirichlet */ 1677 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1678 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1679 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1680 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1681 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1682 /* need to be adapted? */ 1683 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1684 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1685 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1686 /* print info */ 1687 if (pcbddc->dbg_flag) { 1688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1690 } 1691 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1692 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1693 } 1694 } 1695 if (neumann) { /* Neumann */ 1696 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1697 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1698 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1700 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1701 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1702 /* need to be adapted? */ 1703 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1704 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1705 /* print info */ 1706 if (pcbddc->dbg_flag) { 1707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1709 } 1710 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1711 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1712 } 1713 } 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1720 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1721 { 1722 PetscErrorCode ierr; 1723 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1724 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1725 1726 PetscFunctionBegin; 1727 if (!sub_schurs->reuse_mumps) { 1728 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1729 } 1730 if (!pcbddc->switch_static) { 1731 if (applytranspose && pcbddc->local_auxmat1) { 1732 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1733 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1734 } 1735 if (!sub_schurs->reuse_mumps) { 1736 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 } else { 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1743 } 1744 } else { 1745 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 if (applytranspose && pcbddc->local_auxmat1) { 1750 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1751 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1752 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 } 1755 } 1756 if (!sub_schurs->reuse_mumps) { 1757 if (applytranspose) { 1758 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1759 } else { 1760 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1761 } 1762 #if defined(PETSC_HAVE_MUMPS) 1763 } else { 1764 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1765 1766 if (applytranspose) { 1767 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1768 } else { 1769 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1770 } 1771 #endif 1772 } 1773 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1774 if (!pcbddc->switch_static) { 1775 if (!sub_schurs->reuse_mumps) { 1776 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 } else { 1779 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1780 1781 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1782 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1783 } 1784 if (!applytranspose && pcbddc->local_auxmat1) { 1785 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1786 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1787 } 1788 } else { 1789 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 if (!applytranspose && pcbddc->local_auxmat1) { 1794 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1795 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1796 } 1797 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1808 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1809 { 1810 PetscErrorCode ierr; 1811 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1812 PC_IS* pcis = (PC_IS*) (pc->data); 1813 const PetscScalar zero = 0.0; 1814 1815 PetscFunctionBegin; 1816 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1817 if (applytranspose) { 1818 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1819 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1820 } else { 1821 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1822 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1823 } 1824 /* start communications from local primal nodes to rhs of coarse solver */ 1825 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1826 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1828 1829 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1830 /* TODO remove null space when doing multilevel */ 1831 if (pcbddc->coarse_ksp) { 1832 Vec rhs,sol; 1833 1834 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1835 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1836 if (applytranspose) { 1837 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1838 } else { 1839 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1840 } 1841 } 1842 1843 /* Local solution on R nodes */ 1844 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1845 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1846 } 1847 1848 /* communications from coarse sol to local primal nodes */ 1849 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1851 1852 /* Sum contributions from two levels */ 1853 if (applytranspose) { 1854 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1855 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1856 } else { 1857 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1858 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1865 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1866 { 1867 PetscErrorCode ierr; 1868 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1869 PetscScalar *array; 1870 Vec from,to; 1871 1872 PetscFunctionBegin; 1873 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1874 from = pcbddc->coarse_vec; 1875 to = pcbddc->vec1_P; 1876 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1877 Vec tvec; 1878 1879 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1880 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1881 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1882 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1883 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1884 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1885 } 1886 } else { /* from local to global -> put data in coarse right hand side */ 1887 from = pcbddc->vec1_P; 1888 to = pcbddc->coarse_vec; 1889 } 1890 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1896 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1897 { 1898 PetscErrorCode ierr; 1899 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1900 PetscScalar *array; 1901 Vec from,to; 1902 1903 PetscFunctionBegin; 1904 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1905 from = pcbddc->coarse_vec; 1906 to = pcbddc->vec1_P; 1907 } else { /* from local to global -> put data in coarse right hand side */ 1908 from = pcbddc->vec1_P; 1909 to = pcbddc->coarse_vec; 1910 } 1911 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1912 if (smode == SCATTER_FORWARD) { 1913 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1914 Vec tvec; 1915 1916 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1917 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1918 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1919 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1920 } 1921 } else { 1922 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1923 ierr = VecResetArray(from);CHKERRQ(ierr); 1924 } 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 /* uncomment for testing purposes */ 1930 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1933 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1934 { 1935 PetscErrorCode ierr; 1936 PC_IS* pcis = (PC_IS*)(pc->data); 1937 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1938 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1939 /* one and zero */ 1940 PetscScalar one=1.0,zero=0.0; 1941 /* space to store constraints and their local indices */ 1942 PetscScalar *constraints_data; 1943 PetscInt *constraints_idxs,*constraints_idxs_B; 1944 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1945 PetscInt *constraints_n; 1946 /* iterators */ 1947 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1948 /* BLAS integers */ 1949 PetscBLASInt lwork,lierr; 1950 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1951 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1952 /* reuse */ 1953 PetscInt olocal_primal_size,olocal_primal_size_cc; 1954 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1955 /* change of basis */ 1956 PetscBool qr_needed; 1957 PetscBT change_basis,qr_needed_idx; 1958 /* auxiliary stuff */ 1959 PetscInt *nnz,*is_indices; 1960 PetscInt ncc; 1961 /* some quantities */ 1962 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1963 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1964 1965 PetscFunctionBegin; 1966 /* Destroy Mat objects computed previously */ 1967 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1968 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1969 /* save info on constraints from previous setup (if any) */ 1970 olocal_primal_size = pcbddc->local_primal_size; 1971 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1972 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1973 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1974 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1975 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1976 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1977 1978 /* print some info */ 1979 if (pcbddc->dbg_flag) { 1980 IS vertices; 1981 PetscInt nv,nedges,nfaces; 1982 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1983 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1984 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1991 } 1992 1993 if (!pcbddc->adaptive_selection) { 1994 IS ISForVertices,*ISForFaces,*ISForEdges; 1995 MatNullSpace nearnullsp; 1996 const Vec *nearnullvecs; 1997 Vec *localnearnullsp; 1998 PetscScalar *array; 1999 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2000 PetscBool nnsp_has_cnst; 2001 /* LAPACK working arrays for SVD or POD */ 2002 PetscBool skip_lapack,boolforchange; 2003 PetscScalar *work; 2004 PetscReal *singular_vals; 2005 #if defined(PETSC_USE_COMPLEX) 2006 PetscReal *rwork; 2007 #endif 2008 #if defined(PETSC_MISSING_LAPACK_GESVD) 2009 PetscScalar *temp_basis,*correlation_mat; 2010 #else 2011 PetscBLASInt dummy_int=1; 2012 PetscScalar dummy_scalar=1.; 2013 #endif 2014 2015 /* Get index sets for faces, edges and vertices from graph */ 2016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2017 /* free unneeded index sets */ 2018 if (!pcbddc->use_vertices) { 2019 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2020 } 2021 if (!pcbddc->use_edges) { 2022 for (i=0;i<n_ISForEdges;i++) { 2023 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2024 } 2025 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2026 n_ISForEdges = 0; 2027 } 2028 if (!pcbddc->use_faces) { 2029 for (i=0;i<n_ISForFaces;i++) { 2030 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2031 } 2032 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2033 n_ISForFaces = 0; 2034 } 2035 2036 #if defined(PETSC_USE_DEBUG) 2037 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2038 Also use_change_of_basis should be consistent among processors */ 2039 if (pcbddc->NullSpace) { 2040 PetscBool tbool[2],gbool[2]; 2041 2042 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2043 pcbddc->use_change_of_basis = PETSC_TRUE; 2044 if (!ISForEdges) { 2045 pcbddc->use_change_on_faces = PETSC_TRUE; 2046 } 2047 } 2048 tbool[0] = pcbddc->use_change_of_basis; 2049 tbool[1] = pcbddc->use_change_on_faces; 2050 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2051 pcbddc->use_change_of_basis = gbool[0]; 2052 pcbddc->use_change_on_faces = gbool[1]; 2053 } 2054 #endif 2055 2056 /* check if near null space is attached to global mat */ 2057 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2058 if (nearnullsp) { 2059 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2060 /* remove any stored info */ 2061 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2062 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2063 /* store information for BDDC solver reuse */ 2064 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2065 pcbddc->onearnullspace = nearnullsp; 2066 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2067 for (i=0;i<nnsp_size;i++) { 2068 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2069 } 2070 } else { /* if near null space is not provided BDDC uses constants by default */ 2071 nnsp_size = 0; 2072 nnsp_has_cnst = PETSC_TRUE; 2073 } 2074 /* get max number of constraints on a single cc */ 2075 max_constraints = nnsp_size; 2076 if (nnsp_has_cnst) max_constraints++; 2077 2078 /* 2079 Evaluate maximum storage size needed by the procedure 2080 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2081 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2082 There can be multiple constraints per connected component 2083 */ 2084 n_vertices = 0; 2085 if (ISForVertices) { 2086 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2087 } 2088 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2089 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2090 2091 total_counts = n_ISForFaces+n_ISForEdges; 2092 total_counts *= max_constraints; 2093 total_counts += n_vertices; 2094 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2095 2096 total_counts = 0; 2097 max_size_of_constraint = 0; 2098 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2099 IS used_is; 2100 if (i<n_ISForEdges) { 2101 used_is = ISForEdges[i]; 2102 } else { 2103 used_is = ISForFaces[i-n_ISForEdges]; 2104 } 2105 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2106 total_counts += j; 2107 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2108 } 2109 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2110 2111 /* get local part of global near null space vectors */ 2112 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2113 for (k=0;k<nnsp_size;k++) { 2114 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2115 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 } 2118 2119 /* whether or not to skip lapack calls */ 2120 skip_lapack = PETSC_TRUE; 2121 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2122 2123 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2124 if (!skip_lapack) { 2125 PetscScalar temp_work; 2126 2127 #if defined(PETSC_MISSING_LAPACK_GESVD) 2128 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2129 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2130 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2131 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2132 #if defined(PETSC_USE_COMPLEX) 2133 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2134 #endif 2135 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2138 lwork = -1; 2139 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2140 #if !defined(PETSC_USE_COMPLEX) 2141 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2142 #else 2143 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2144 #endif 2145 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2146 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2147 #else /* on missing GESVD */ 2148 /* SVD */ 2149 PetscInt max_n,min_n; 2150 max_n = max_size_of_constraint; 2151 min_n = max_constraints; 2152 if (max_size_of_constraint < max_constraints) { 2153 min_n = max_size_of_constraint; 2154 max_n = max_constraints; 2155 } 2156 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2157 #if defined(PETSC_USE_COMPLEX) 2158 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2159 #endif 2160 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2161 lwork = -1; 2162 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2165 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2166 #if !defined(PETSC_USE_COMPLEX) 2167 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2168 #else 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2170 #endif 2171 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2173 #endif /* on missing GESVD */ 2174 /* Allocate optimal workspace */ 2175 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2176 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2177 } 2178 /* Now we can loop on constraining sets */ 2179 total_counts = 0; 2180 constraints_idxs_ptr[0] = 0; 2181 constraints_data_ptr[0] = 0; 2182 /* vertices */ 2183 if (n_vertices) { 2184 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2185 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2186 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2187 for (i=0;i<n_vertices;i++) { 2188 constraints_n[total_counts] = 1; 2189 constraints_data[total_counts] = 1.0; 2190 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2191 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2192 total_counts++; 2193 } 2194 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2195 PetscBool used_vertex; 2196 for (i=0;i<n_vertices;i++) { 2197 used_vertex = PETSC_FALSE; 2198 k = 0; 2199 while (!used_vertex && k<nnsp_size) { 2200 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2201 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2202 constraints_n[total_counts] = 1; 2203 constraints_idxs[total_counts] = is_indices[i]; 2204 constraints_data[total_counts] = 1.0; 2205 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2206 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2207 total_counts++; 2208 used_vertex = PETSC_TRUE; 2209 } 2210 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2211 k++; 2212 } 2213 } 2214 } 2215 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2216 n_vertices = total_counts; 2217 } 2218 2219 /* edges and faces */ 2220 total_counts_cc = total_counts; 2221 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2222 IS used_is; 2223 PetscBool idxs_copied = PETSC_FALSE; 2224 2225 if (ncc<n_ISForEdges) { 2226 used_is = ISForEdges[ncc]; 2227 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2228 } else { 2229 used_is = ISForFaces[ncc-n_ISForEdges]; 2230 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2231 } 2232 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2233 2234 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2235 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2236 /* change of basis should not be performed on local periodic nodes */ 2237 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2238 if (nnsp_has_cnst) { 2239 PetscScalar quad_value; 2240 2241 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2242 idxs_copied = PETSC_TRUE; 2243 2244 if (!pcbddc->use_nnsp_true) { 2245 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2246 } else { 2247 quad_value = 1.0; 2248 } 2249 for (j=0;j<size_of_constraint;j++) { 2250 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2251 } 2252 temp_constraints++; 2253 total_counts++; 2254 } 2255 for (k=0;k<nnsp_size;k++) { 2256 PetscReal real_value; 2257 PetscScalar *ptr_to_data; 2258 2259 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2260 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2261 for (j=0;j<size_of_constraint;j++) { 2262 ptr_to_data[j] = array[is_indices[j]]; 2263 } 2264 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2265 /* check if array is null on the connected component */ 2266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2267 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2268 if (real_value > 0.0) { /* keep indices and values */ 2269 temp_constraints++; 2270 total_counts++; 2271 if (!idxs_copied) { 2272 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2273 idxs_copied = PETSC_TRUE; 2274 } 2275 } 2276 } 2277 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2278 valid_constraints = temp_constraints; 2279 if (!pcbddc->use_nnsp_true && temp_constraints) { 2280 if (temp_constraints == 1) { /* just normalize the constraint */ 2281 PetscScalar norm,*ptr_to_data; 2282 2283 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2285 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2286 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2287 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2288 } else { /* perform SVD */ 2289 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2290 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2291 2292 #if defined(PETSC_MISSING_LAPACK_GESVD) 2293 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2294 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2295 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2296 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2297 from that computed using LAPACKgesvd 2298 -> This is due to a different computation of eigenvectors in LAPACKheev 2299 -> The quality of the POD-computed basis will be the same */ 2300 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2301 /* Store upper triangular part of correlation matrix */ 2302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2303 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2304 for (j=0;j<temp_constraints;j++) { 2305 for (k=0;k<j+1;k++) { 2306 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2307 } 2308 } 2309 /* compute eigenvalues and eigenvectors of correlation matrix */ 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2312 #if !defined(PETSC_USE_COMPLEX) 2313 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2314 #else 2315 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2316 #endif 2317 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2318 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2319 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2320 j = 0; 2321 while (j < temp_constraints && singular_vals[j] < tol) j++; 2322 total_counts = total_counts-j; 2323 valid_constraints = temp_constraints-j; 2324 /* scale and copy POD basis into used quadrature memory */ 2325 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2331 if (j<temp_constraints) { 2332 PetscInt ii; 2333 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2334 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2335 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2337 for (k=0;k<temp_constraints-j;k++) { 2338 for (ii=0;ii<size_of_constraint;ii++) { 2339 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2340 } 2341 } 2342 } 2343 #else /* on missing GESVD */ 2344 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2345 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2348 #if !defined(PETSC_USE_COMPLEX) 2349 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2350 #else 2351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2352 #endif 2353 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2355 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2356 k = temp_constraints; 2357 if (k > size_of_constraint) k = size_of_constraint; 2358 j = 0; 2359 while (j < k && singular_vals[k-j-1] < tol) j++; 2360 valid_constraints = k-j; 2361 total_counts = total_counts-temp_constraints+valid_constraints; 2362 #endif /* on missing GESVD */ 2363 } 2364 } 2365 /* update pointers information */ 2366 if (valid_constraints) { 2367 constraints_n[total_counts_cc] = valid_constraints; 2368 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2369 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2370 /* set change_of_basis flag */ 2371 if (boolforchange) { 2372 PetscBTSet(change_basis,total_counts_cc); 2373 } 2374 total_counts_cc++; 2375 } 2376 } 2377 /* free workspace */ 2378 if (!skip_lapack) { 2379 ierr = PetscFree(work);CHKERRQ(ierr); 2380 #if defined(PETSC_USE_COMPLEX) 2381 ierr = PetscFree(rwork);CHKERRQ(ierr); 2382 #endif 2383 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2384 #if defined(PETSC_MISSING_LAPACK_GESVD) 2385 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2386 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2387 #endif 2388 } 2389 for (k=0;k<nnsp_size;k++) { 2390 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2391 } 2392 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2393 /* free index sets of faces, edges and vertices */ 2394 for (i=0;i<n_ISForFaces;i++) { 2395 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2396 } 2397 if (n_ISForFaces) { 2398 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2399 } 2400 for (i=0;i<n_ISForEdges;i++) { 2401 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2402 } 2403 if (n_ISForEdges) { 2404 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2405 } 2406 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2407 } else { 2408 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2409 2410 total_counts = 0; 2411 n_vertices = 0; 2412 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2413 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2414 } 2415 max_constraints = 0; 2416 total_counts_cc = 0; 2417 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2418 total_counts += pcbddc->adaptive_constraints_n[i]; 2419 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2420 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2421 } 2422 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2423 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2424 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2425 constraints_data = pcbddc->adaptive_constraints_data; 2426 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2427 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2428 total_counts_cc = 0; 2429 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2430 if (pcbddc->adaptive_constraints_n[i]) { 2431 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2432 } 2433 } 2434 #if 0 2435 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2436 for (i=0;i<total_counts_cc;i++) { 2437 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2438 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2439 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2440 printf(" %d",constraints_idxs[j]); 2441 } 2442 printf("\n"); 2443 printf("number of cc: %d\n",constraints_n[i]); 2444 } 2445 for (i=0;i<n_vertices;i++) { 2446 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2447 } 2448 for (i=0;i<sub_schurs->n_subs;i++) { 2449 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2450 } 2451 #endif 2452 2453 max_size_of_constraint = 0; 2454 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2455 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2456 /* Change of basis */ 2457 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2458 if (pcbddc->use_change_of_basis) { 2459 for (i=0;i<sub_schurs->n_subs;i++) { 2460 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2461 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2462 } 2463 } 2464 } 2465 } 2466 pcbddc->local_primal_size = total_counts; 2467 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2468 2469 /* map constraints_idxs in boundary numbering */ 2470 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2471 if (i != constraints_idxs_ptr[total_counts_cc]) { 2472 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2473 } 2474 2475 /* Create constraint matrix */ 2476 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2477 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2478 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2479 2480 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2481 /* determine if a QR strategy is needed for change of basis */ 2482 qr_needed = PETSC_FALSE; 2483 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2484 total_primal_vertices=0; 2485 pcbddc->local_primal_size_cc = 0; 2486 for (i=0;i<total_counts_cc;i++) { 2487 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2488 if (size_of_constraint == 1) { 2489 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2490 pcbddc->local_primal_size_cc += 1; 2491 } else if (PetscBTLookup(change_basis,i)) { 2492 for (k=0;k<constraints_n[i];k++) { 2493 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2494 } 2495 pcbddc->local_primal_size_cc += constraints_n[i]; 2496 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2497 PetscBTSet(qr_needed_idx,i); 2498 qr_needed = PETSC_TRUE; 2499 } 2500 } else { 2501 pcbddc->local_primal_size_cc += 1; 2502 } 2503 } 2504 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2505 pcbddc->n_vertices = total_primal_vertices; 2506 /* permute indices in order to have a sorted set of vertices */ 2507 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2508 2509 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2510 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2511 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2512 2513 /* nonzero structure of constraint matrix */ 2514 /* and get reference dof for local constraints */ 2515 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2516 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2517 2518 j = total_primal_vertices; 2519 total_counts = total_primal_vertices; 2520 cum = total_primal_vertices; 2521 for (i=n_vertices;i<total_counts_cc;i++) { 2522 if (!PetscBTLookup(change_basis,i)) { 2523 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2524 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2525 cum++; 2526 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2527 for (k=0;k<constraints_n[i];k++) { 2528 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2529 nnz[j+k] = size_of_constraint; 2530 } 2531 j += constraints_n[i]; 2532 } 2533 } 2534 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2535 ierr = PetscFree(nnz);CHKERRQ(ierr); 2536 2537 /* set values in constraint matrix */ 2538 for (i=0;i<total_primal_vertices;i++) { 2539 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2540 } 2541 total_counts = total_primal_vertices; 2542 for (i=n_vertices;i<total_counts_cc;i++) { 2543 if (!PetscBTLookup(change_basis,i)) { 2544 PetscInt *cols; 2545 2546 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2547 cols = constraints_idxs+constraints_idxs_ptr[i]; 2548 for (k=0;k<constraints_n[i];k++) { 2549 PetscInt row = total_counts+k; 2550 PetscScalar *vals; 2551 2552 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2553 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 total_counts += constraints_n[i]; 2556 } 2557 } 2558 /* assembling */ 2559 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2561 2562 /* 2563 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2564 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2565 */ 2566 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2567 if (pcbddc->use_change_of_basis) { 2568 /* dual and primal dofs on a single cc */ 2569 PetscInt dual_dofs,primal_dofs; 2570 /* working stuff for GEQRF */ 2571 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2572 PetscBLASInt lqr_work; 2573 /* working stuff for UNGQR */ 2574 PetscScalar *gqr_work,lgqr_work_t; 2575 PetscBLASInt lgqr_work; 2576 /* working stuff for TRTRS */ 2577 PetscScalar *trs_rhs; 2578 PetscBLASInt Blas_NRHS; 2579 /* pointers for values insertion into change of basis matrix */ 2580 PetscInt *start_rows,*start_cols; 2581 PetscScalar *start_vals; 2582 /* working stuff for values insertion */ 2583 PetscBT is_primal; 2584 PetscInt *aux_primal_numbering_B; 2585 /* matrix sizes */ 2586 PetscInt global_size,local_size; 2587 /* temporary change of basis */ 2588 Mat localChangeOfBasisMatrix; 2589 /* extra space for debugging */ 2590 PetscScalar *dbg_work; 2591 2592 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2593 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2594 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2595 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2596 /* nonzeros for local mat */ 2597 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2598 for (i=0;i<pcis->n;i++) nnz[i]=1; 2599 for (i=n_vertices;i<total_counts_cc;i++) { 2600 if (PetscBTLookup(change_basis,i)) { 2601 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2602 if (PetscBTLookup(qr_needed_idx,i)) { 2603 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2604 } else { 2605 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2606 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2607 } 2608 } 2609 } 2610 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2611 ierr = PetscFree(nnz);CHKERRQ(ierr); 2612 /* Set initial identity in the matrix */ 2613 for (i=0;i<pcis->n;i++) { 2614 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2615 } 2616 2617 if (pcbddc->dbg_flag) { 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2620 } 2621 2622 2623 /* Now we loop on the constraints which need a change of basis */ 2624 /* 2625 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2626 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2627 2628 Basic blocks of change of basis matrix T computed by 2629 2630 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2631 2632 | 1 0 ... 0 s_1/S | 2633 | 0 1 ... 0 s_2/S | 2634 | ... | 2635 | 0 ... 1 s_{n-1}/S | 2636 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2637 2638 with S = \sum_{i=1}^n s_i^2 2639 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2640 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2641 2642 - QR decomposition of constraints otherwise 2643 */ 2644 if (qr_needed) { 2645 /* space to store Q */ 2646 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2647 /* first we issue queries for optimal work */ 2648 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2651 lqr_work = -1; 2652 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2653 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2654 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2655 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2656 lgqr_work = -1; 2657 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2658 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2659 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2661 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2662 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2663 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2664 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2665 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2666 /* array to store scaling factors for reflectors */ 2667 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2668 /* array to store rhs and solution of triangular solver */ 2669 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2670 /* allocating workspace for check */ 2671 if (pcbddc->dbg_flag) { 2672 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2673 } 2674 } 2675 /* array to store whether a node is primal or not */ 2676 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2677 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2678 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2679 if (i != total_primal_vertices) { 2680 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2681 } 2682 for (i=0;i<total_primal_vertices;i++) { 2683 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2684 } 2685 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2686 2687 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2688 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2689 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2690 if (PetscBTLookup(change_basis,total_counts)) { 2691 /* get constraint info */ 2692 primal_dofs = constraints_n[total_counts]; 2693 dual_dofs = size_of_constraint-primal_dofs; 2694 2695 if (pcbddc->dbg_flag) { 2696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2697 } 2698 2699 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2700 2701 /* copy quadrature constraints for change of basis check */ 2702 if (pcbddc->dbg_flag) { 2703 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2704 } 2705 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2706 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2707 2708 /* compute QR decomposition of constraints */ 2709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2713 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2715 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2716 2717 /* explictly compute R^-T */ 2718 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2719 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2724 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2725 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2726 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2728 2729 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2734 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2735 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2736 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2737 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2738 2739 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2740 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2741 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2743 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2748 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2749 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2750 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2751 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2752 2753 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2754 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2755 /* insert cols for primal dofs */ 2756 for (j=0;j<primal_dofs;j++) { 2757 start_vals = &qr_basis[j*size_of_constraint]; 2758 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2759 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2760 } 2761 /* insert cols for dual dofs */ 2762 for (j=0,k=0;j<dual_dofs;k++) { 2763 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2764 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2765 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2766 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2767 j++; 2768 } 2769 } 2770 2771 /* check change of basis */ 2772 if (pcbddc->dbg_flag) { 2773 PetscInt ii,jj; 2774 PetscBool valid_qr=PETSC_TRUE; 2775 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2776 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2781 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2782 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2784 for (jj=0;jj<size_of_constraint;jj++) { 2785 for (ii=0;ii<primal_dofs;ii++) { 2786 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2787 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2788 } 2789 } 2790 if (!valid_qr) { 2791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2792 for (jj=0;jj<size_of_constraint;jj++) { 2793 for (ii=0;ii<primal_dofs;ii++) { 2794 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2795 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2796 } 2797 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2798 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2799 } 2800 } 2801 } 2802 } else { 2803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2804 } 2805 } 2806 } else { /* simple transformation block */ 2807 PetscInt row,col; 2808 PetscScalar val,norm; 2809 2810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2811 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2812 for (j=0;j<size_of_constraint;j++) { 2813 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2814 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2815 if (!PetscBTLookup(is_primal,row_B)) { 2816 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2817 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2818 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2819 } else { 2820 for (k=0;k<size_of_constraint;k++) { 2821 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2822 if (row != col) { 2823 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2824 } else { 2825 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2826 } 2827 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2828 } 2829 } 2830 } 2831 if (pcbddc->dbg_flag) { 2832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2833 } 2834 } 2835 } else { 2836 if (pcbddc->dbg_flag) { 2837 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2838 } 2839 } 2840 } 2841 2842 /* free workspace */ 2843 if (qr_needed) { 2844 if (pcbddc->dbg_flag) { 2845 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2846 } 2847 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2848 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2849 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2850 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2852 } 2853 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2854 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 2857 /* assembling of global change of variable */ 2858 { 2859 Mat tmat; 2860 PetscInt bs; 2861 2862 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2863 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2864 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2865 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2867 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2868 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2869 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2870 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2871 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2872 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2873 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2874 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2875 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2876 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2878 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2879 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2880 } 2881 /* check */ 2882 if (pcbddc->dbg_flag) { 2883 PetscReal error; 2884 Vec x,x_change; 2885 2886 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2887 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2888 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2889 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2890 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2892 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2893 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2895 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2896 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2897 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2898 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2900 ierr = VecDestroy(&x);CHKERRQ(ierr); 2901 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2902 } 2903 2904 /* adapt sub_schurs computed (if any) */ 2905 if (pcbddc->use_deluxe_scaling) { 2906 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2907 if (sub_schurs->S_Ej_all) { 2908 Mat S_new,tmat; 2909 IS is_all_N; 2910 2911 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2912 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2913 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2914 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2915 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2916 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2917 sub_schurs->S_Ej_all = S_new; 2918 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2919 if (sub_schurs->sum_S_Ej_all) { 2920 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2921 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2922 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2923 sub_schurs->sum_S_Ej_all = S_new; 2924 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2925 } 2926 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2927 } 2928 } 2929 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2930 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2931 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2932 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2933 } 2934 2935 /* set up change of basis context */ 2936 if (pcbddc->ChangeOfBasisMatrix) { 2937 PCBDDCChange_ctx change_ctx; 2938 2939 if (!pcbddc->new_global_mat) { 2940 PetscInt global_size,local_size; 2941 2942 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2943 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2944 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2946 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2947 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2948 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2949 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2950 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2951 } else { 2952 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2953 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2954 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2955 } 2956 if (!pcbddc->user_ChangeOfBasisMatrix) { 2957 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2958 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2959 } else { 2960 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2961 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2962 } 2963 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2964 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2965 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 } 2968 2969 /* check if a new primal space has been introduced */ 2970 pcbddc->new_primal_space_local = PETSC_TRUE; 2971 if (olocal_primal_size == pcbddc->local_primal_size) { 2972 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2973 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2974 if (!pcbddc->new_primal_space_local) { 2975 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2976 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2977 } 2978 } 2979 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2980 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2981 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2982 2983 /* flush dbg viewer */ 2984 if (pcbddc->dbg_flag) { 2985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2986 } 2987 2988 /* free workspace */ 2989 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2990 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2991 if (!pcbddc->adaptive_selection) { 2992 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2993 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2994 } else { 2995 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2996 pcbddc->adaptive_constraints_idxs_ptr, 2997 pcbddc->adaptive_constraints_data_ptr, 2998 pcbddc->adaptive_constraints_idxs, 2999 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3000 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3001 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3008 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3009 { 3010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3011 PC_IS *pcis = (PC_IS*)pc->data; 3012 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3013 PetscInt ierr,i,vertex_size,N; 3014 PetscViewer viewer=pcbddc->dbg_viewer; 3015 3016 PetscFunctionBegin; 3017 /* Reset previously computed graph */ 3018 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3019 /* Init local Graph struct */ 3020 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3021 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3022 3023 /* Check validity of the csr graph passed in by the user */ 3024 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3025 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3026 } 3027 3028 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3029 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3030 PetscInt *xadj,*adjncy; 3031 PetscInt nvtxs; 3032 PetscBool flg_row=PETSC_FALSE; 3033 3034 if (pcbddc->use_local_adj) { 3035 3036 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3037 if (flg_row) { 3038 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3039 pcbddc->computed_rowadj = PETSC_TRUE; 3040 } 3041 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3043 IS is_dummy; 3044 ISLocalToGlobalMapping l2gmap_dummy; 3045 PetscInt j,sum; 3046 PetscInt *cxadj,*cadjncy; 3047 const PetscInt *idxs; 3048 PCBDDCGraph graph; 3049 PetscBT is_on_boundary; 3050 3051 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3052 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3053 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3054 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3055 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3056 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3057 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3058 if (flg_row) { 3059 graph->xadj = xadj; 3060 graph->adjncy = adjncy; 3061 } 3062 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3063 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3064 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3065 3066 if (pcbddc->dbg_flag) { 3067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3068 for (i=0;i<graph->ncc;i++) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3070 } 3071 } 3072 3073 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3074 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3075 for (i=0;i<pcis->n_B;i++) { 3076 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3077 } 3078 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3079 3080 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3081 sum = 0; 3082 for (i=0;i<graph->ncc;i++) { 3083 PetscInt sizecc = 0; 3084 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3085 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3086 sizecc++; 3087 } 3088 } 3089 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3090 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3091 cxadj[graph->queue[j]] = sizecc; 3092 } 3093 } 3094 sum += sizecc*sizecc; 3095 } 3096 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3097 sum = 0; 3098 for (i=0;i<pcis->n;i++) { 3099 PetscInt temp = cxadj[i]; 3100 cxadj[i] = sum; 3101 sum += temp; 3102 } 3103 cxadj[pcis->n] = sum; 3104 for (i=0;i<graph->ncc;i++) { 3105 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3106 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3107 PetscInt k,sizecc = 0; 3108 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3109 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3110 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3111 sizecc++; 3112 } 3113 } 3114 } 3115 } 3116 } 3117 if (sum) { 3118 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3119 } else { 3120 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3121 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3122 } 3123 graph->xadj = 0; 3124 graph->adjncy = 0; 3125 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3126 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3127 } 3128 } 3129 if (pcbddc->dbg_flag) { 3130 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3131 } 3132 3133 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3134 vertex_size = 1; 3135 if (pcbddc->user_provided_isfordofs) { 3136 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3137 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3138 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3139 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3140 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3141 } 3142 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3143 pcbddc->n_ISForDofs = 0; 3144 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3145 } 3146 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3147 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3148 } else { 3149 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3150 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3151 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3152 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3153 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3154 } 3155 } 3156 } 3157 3158 /* Setup of Graph */ 3159 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3160 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3161 } 3162 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3163 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3164 } 3165 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3166 3167 /* Graph's connected components analysis */ 3168 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3169 3170 /* print some info to stdout */ 3171 if (pcbddc->dbg_flag) { 3172 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3173 } 3174 3175 /* mark topography has done */ 3176 pcbddc->recompute_topography = PETSC_FALSE; 3177 PetscFunctionReturn(0); 3178 } 3179 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCSubsetNumbering" 3182 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3183 { 3184 Vec local_vec,global_vec; 3185 IS seqis,paris; 3186 VecScatter scatter_ctx; 3187 PetscScalar *array; 3188 PetscInt *temp_global_dofs; 3189 PetscScalar globalsum; 3190 PetscInt i,j,s; 3191 PetscInt nlocals,first_index,old_index,max_local,max_global; 3192 PetscMPIInt rank_prec_comm,size_prec_comm; 3193 PetscInt *dof_sizes,*dof_displs; 3194 PetscBool first_found; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 /* mpi buffers */ 3199 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3200 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3201 j = ( !rank_prec_comm ? size_prec_comm : 0); 3202 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3203 /* get maximum size of subset */ 3204 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3205 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3206 max_local = 0; 3207 for (i=0;i<n_local_dofs;i++) { 3208 if (max_local < temp_global_dofs[i] ) { 3209 max_local = temp_global_dofs[i]; 3210 } 3211 } 3212 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3213 max_global++; 3214 max_local = 0; 3215 for (i=0;i<n_local_dofs;i++) { 3216 if (max_local < local_dofs[i] ) { 3217 max_local = local_dofs[i]; 3218 } 3219 } 3220 max_local++; 3221 /* allocate workspace */ 3222 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3223 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3224 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3225 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3226 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3227 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3228 /* create scatter */ 3229 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3230 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3231 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3232 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3233 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3234 /* init array */ 3235 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3236 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3237 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3238 if (local_dofs_mult) { 3239 for (i=0;i<n_local_dofs;i++) { 3240 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3241 } 3242 } else { 3243 for (i=0;i<n_local_dofs;i++) { 3244 array[local_dofs[i]]=1.0; 3245 } 3246 } 3247 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3248 /* scatter into global vec and get total number of global dofs */ 3249 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3251 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3252 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3253 /* Fill global_vec with cumulative function for global numbering */ 3254 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3255 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3256 nlocals = 0; 3257 first_index = -1; 3258 first_found = PETSC_FALSE; 3259 for (i=0;i<s;i++) { 3260 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3261 first_found = PETSC_TRUE; 3262 first_index = i; 3263 } 3264 nlocals += (PetscInt)PetscRealPart(array[i]); 3265 } 3266 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3267 if (!rank_prec_comm) { 3268 dof_displs[0]=0; 3269 for (i=1;i<size_prec_comm;i++) { 3270 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3271 } 3272 } 3273 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3274 if (first_found) { 3275 array[first_index] += (PetscScalar)nlocals; 3276 old_index = first_index; 3277 for (i=first_index+1;i<s;i++) { 3278 if (PetscRealPart(array[i]) > 0.1) { 3279 array[i] += array[old_index]; 3280 old_index = i; 3281 } 3282 } 3283 } 3284 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3285 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3286 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3287 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3288 /* get global ordering of local dofs */ 3289 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3290 if (local_dofs_mult) { 3291 for (i=0;i<n_local_dofs;i++) { 3292 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3293 } 3294 } else { 3295 for (i=0;i<n_local_dofs;i++) { 3296 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3297 } 3298 } 3299 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3300 /* free workspace */ 3301 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3302 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3303 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3304 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3305 /* return pointer to global ordering of local dofs */ 3306 *global_numbering_subset = temp_global_dofs; 3307 PetscFunctionReturn(0); 3308 } 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3312 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3313 { 3314 PetscInt i,j; 3315 PetscScalar *alphas; 3316 PetscErrorCode ierr; 3317 3318 PetscFunctionBegin; 3319 /* this implements stabilized Gram-Schmidt */ 3320 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3321 for (i=0;i<n;i++) { 3322 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3323 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3324 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3325 } 3326 ierr = PetscFree(alphas);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatISGetSubassemblingPattern" 3332 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3333 { 3334 Mat subdomain_adj; 3335 IS new_ranks,ranks_send_to; 3336 MatPartitioning partitioner; 3337 Mat_IS *matis; 3338 PetscInt n_neighs,*neighs,*n_shared,**shared; 3339 PetscInt prank; 3340 PetscMPIInt size,rank,color; 3341 PetscInt *xadj,*adjncy,*oldranks; 3342 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3343 PetscInt i,local_size,threshold=0; 3344 PetscErrorCode ierr; 3345 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3346 PetscSubcomm subcomm; 3347 3348 PetscFunctionBegin; 3349 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3350 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3351 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3352 3353 /* Get info on mapping */ 3354 matis = (Mat_IS*)(mat->data); 3355 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3356 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3357 3358 /* build local CSR graph of subdomains' connectivity */ 3359 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3360 xadj[0] = 0; 3361 xadj[1] = PetscMax(n_neighs-1,0); 3362 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3363 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3364 3365 if (threshold) { 3366 PetscInt xadj_count = 0; 3367 for (i=1;i<n_neighs;i++) { 3368 if (n_shared[i] > threshold) { 3369 adjncy[xadj_count] = neighs[i]; 3370 adjncy_wgt[xadj_count] = n_shared[i]; 3371 xadj_count++; 3372 } 3373 } 3374 xadj[1] = xadj_count; 3375 } else { 3376 if (xadj[1]) { 3377 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3378 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3379 } 3380 } 3381 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3382 if (use_square) { 3383 for (i=0;i<xadj[1];i++) { 3384 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3385 } 3386 } 3387 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3388 3389 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3390 3391 /* 3392 Restrict work on active processes only. 3393 */ 3394 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3395 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3396 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3397 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3398 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3399 if (color) { 3400 ierr = PetscFree(xadj);CHKERRQ(ierr); 3401 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3402 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3403 } else { 3404 PetscInt coarsening_ratio; 3405 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3406 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3407 prank = rank; 3408 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3409 /* 3410 for (i=0;i<size;i++) { 3411 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3412 } 3413 */ 3414 for (i=0;i<xadj[1];i++) { 3415 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3416 } 3417 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3418 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3419 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3420 3421 /* Partition */ 3422 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3423 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3424 if (use_vwgt) { 3425 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3426 v_wgt[0] = local_size; 3427 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3428 } 3429 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3430 coarsening_ratio = size/n_subdomains; 3431 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3432 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3433 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3434 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3435 3436 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3437 if (contiguous) { 3438 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3439 } else { 3440 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3441 } 3442 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3443 /* clean up */ 3444 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3445 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3446 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3447 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3448 } 3449 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3450 3451 /* assemble parallel IS for sends */ 3452 i = 1; 3453 if (color) i=0; 3454 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3455 3456 /* get back IS */ 3457 *is_sends = ranks_send_to; 3458 PetscFunctionReturn(0); 3459 } 3460 3461 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3462 3463 #undef __FUNCT__ 3464 #define __FUNCT__ "MatISSubassemble" 3465 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3466 { 3467 Mat local_mat; 3468 Mat_IS *matis; 3469 IS is_sends_internal; 3470 PetscInt rows,cols,new_local_rows; 3471 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3472 PetscBool ismatis,isdense,newisdense,destroy_mat; 3473 ISLocalToGlobalMapping l2gmap; 3474 PetscInt* l2gmap_indices; 3475 const PetscInt* is_indices; 3476 MatType new_local_type; 3477 /* buffers */ 3478 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3479 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3480 PetscInt *recv_buffer_idxs_local; 3481 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3482 /* MPI */ 3483 MPI_Comm comm,comm_n; 3484 PetscSubcomm subcomm; 3485 PetscMPIInt n_sends,n_recvs,commsize; 3486 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3487 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3488 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3489 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3490 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 /* TODO: add missing checks */ 3495 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3496 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3497 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3498 PetscValidLogicalCollectiveInt(mat,nis,7); 3499 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3500 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3501 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3502 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3503 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3504 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3505 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3506 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3507 PetscInt mrows,mcols,mnrows,mncols; 3508 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3509 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3510 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3511 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3512 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3513 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3514 } 3515 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3516 PetscValidLogicalCollectiveInt(mat,bs,0); 3517 /* prepare IS for sending if not provided */ 3518 if (!is_sends) { 3519 PetscBool pcontig = PETSC_TRUE; 3520 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3521 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3522 } else { 3523 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3524 is_sends_internal = is_sends; 3525 } 3526 3527 /* get pointer of MATIS data */ 3528 matis = (Mat_IS*)mat->data; 3529 3530 /* get comm */ 3531 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3532 3533 /* compute number of sends */ 3534 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3535 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3536 3537 /* compute number of receives */ 3538 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3539 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3540 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3541 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3542 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3543 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3544 ierr = PetscFree(iflags);CHKERRQ(ierr); 3545 3546 /* restrict comm if requested */ 3547 subcomm = 0; 3548 destroy_mat = PETSC_FALSE; 3549 if (restrict_comm) { 3550 PetscMPIInt color,subcommsize; 3551 3552 color = 0; 3553 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3554 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3555 subcommsize = commsize - subcommsize; 3556 /* check if reuse has been requested */ 3557 if (reuse == MAT_REUSE_MATRIX) { 3558 if (*mat_n) { 3559 PetscMPIInt subcommsize2; 3560 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3561 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3562 comm_n = PetscObjectComm((PetscObject)*mat_n); 3563 } else { 3564 comm_n = PETSC_COMM_SELF; 3565 } 3566 } else { /* MAT_INITIAL_MATRIX */ 3567 PetscMPIInt rank; 3568 3569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3570 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3571 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3572 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3573 comm_n = PetscSubcommChild(subcomm); 3574 } 3575 /* flag to destroy *mat_n if not significative */ 3576 if (color) destroy_mat = PETSC_TRUE; 3577 } else { 3578 comm_n = comm; 3579 } 3580 3581 /* prepare send/receive buffers */ 3582 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3583 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3584 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3585 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3586 if (nis) { 3587 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3588 } 3589 3590 /* Get data from local matrices */ 3591 if (!isdense) { 3592 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3593 /* TODO: See below some guidelines on how to prepare the local buffers */ 3594 /* 3595 send_buffer_vals should contain the raw values of the local matrix 3596 send_buffer_idxs should contain: 3597 - MatType_PRIVATE type 3598 - PetscInt size_of_l2gmap 3599 - PetscInt global_row_indices[size_of_l2gmap] 3600 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3601 */ 3602 } else { 3603 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3604 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3605 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3606 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3607 send_buffer_idxs[1] = i; 3608 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3609 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3610 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3611 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3612 for (i=0;i<n_sends;i++) { 3613 ilengths_vals[is_indices[i]] = len*len; 3614 ilengths_idxs[is_indices[i]] = len+2; 3615 } 3616 } 3617 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3618 /* additional is (if any) */ 3619 if (nis) { 3620 PetscMPIInt psum; 3621 PetscInt j; 3622 for (j=0,psum=0;j<nis;j++) { 3623 PetscInt plen; 3624 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3625 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3626 psum += len+1; /* indices + lenght */ 3627 } 3628 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3629 for (j=0,psum=0;j<nis;j++) { 3630 PetscInt plen; 3631 const PetscInt *is_array_idxs; 3632 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3633 send_buffer_idxs_is[psum] = plen; 3634 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3635 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3636 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3637 psum += plen+1; /* indices + lenght */ 3638 } 3639 for (i=0;i<n_sends;i++) { 3640 ilengths_idxs_is[is_indices[i]] = psum; 3641 } 3642 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3643 } 3644 3645 buf_size_idxs = 0; 3646 buf_size_vals = 0; 3647 buf_size_idxs_is = 0; 3648 for (i=0;i<n_recvs;i++) { 3649 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3650 buf_size_vals += (PetscInt)olengths_vals[i]; 3651 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3652 } 3653 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3654 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3655 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3656 3657 /* get new tags for clean communications */ 3658 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3660 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3661 3662 /* allocate for requests */ 3663 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3664 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3665 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3667 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3668 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3669 3670 /* communications */ 3671 ptr_idxs = recv_buffer_idxs; 3672 ptr_vals = recv_buffer_vals; 3673 ptr_idxs_is = recv_buffer_idxs_is; 3674 for (i=0;i<n_recvs;i++) { 3675 source_dest = onodes[i]; 3676 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3677 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3678 ptr_idxs += olengths_idxs[i]; 3679 ptr_vals += olengths_vals[i]; 3680 if (nis) { 3681 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3682 ptr_idxs_is += olengths_idxs_is[i]; 3683 } 3684 } 3685 for (i=0;i<n_sends;i++) { 3686 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3687 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3688 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3689 if (nis) { 3690 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3691 } 3692 } 3693 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3694 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3695 3696 /* assemble new l2g map */ 3697 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3698 ptr_idxs = recv_buffer_idxs; 3699 new_local_rows = 0; 3700 for (i=0;i<n_recvs;i++) { 3701 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3702 ptr_idxs += olengths_idxs[i]; 3703 } 3704 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3705 ptr_idxs = recv_buffer_idxs; 3706 new_local_rows = 0; 3707 for (i=0;i<n_recvs;i++) { 3708 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3709 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3710 ptr_idxs += olengths_idxs[i]; 3711 } 3712 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3713 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3714 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3715 3716 /* infer new local matrix type from received local matrices type */ 3717 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3718 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3719 if (n_recvs) { 3720 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3721 ptr_idxs = recv_buffer_idxs; 3722 for (i=0;i<n_recvs;i++) { 3723 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3724 new_local_type_private = MATAIJ_PRIVATE; 3725 break; 3726 } 3727 ptr_idxs += olengths_idxs[i]; 3728 } 3729 switch (new_local_type_private) { 3730 case MATDENSE_PRIVATE: 3731 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3732 new_local_type = MATSEQAIJ; 3733 bs = 1; 3734 } else { /* if I receive only 1 dense matrix */ 3735 new_local_type = MATSEQDENSE; 3736 bs = 1; 3737 } 3738 break; 3739 case MATAIJ_PRIVATE: 3740 new_local_type = MATSEQAIJ; 3741 bs = 1; 3742 break; 3743 case MATBAIJ_PRIVATE: 3744 new_local_type = MATSEQBAIJ; 3745 break; 3746 case MATSBAIJ_PRIVATE: 3747 new_local_type = MATSEQSBAIJ; 3748 break; 3749 default: 3750 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3751 break; 3752 } 3753 } else { /* by default, new_local_type is seqdense */ 3754 new_local_type = MATSEQDENSE; 3755 bs = 1; 3756 } 3757 3758 /* create MATIS object if needed */ 3759 if (reuse == MAT_INITIAL_MATRIX) { 3760 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3761 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3762 } else { 3763 /* it also destroys the local matrices */ 3764 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3765 } 3766 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3767 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3768 3769 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3770 3771 /* Global to local map of received indices */ 3772 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3773 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3774 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3775 3776 /* restore attributes -> type of incoming data and its size */ 3777 buf_size_idxs = 0; 3778 for (i=0;i<n_recvs;i++) { 3779 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3780 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3781 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3782 } 3783 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3784 3785 /* set preallocation */ 3786 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3787 if (!newisdense) { 3788 PetscInt *new_local_nnz=0; 3789 3790 ptr_vals = recv_buffer_vals; 3791 ptr_idxs = recv_buffer_idxs_local; 3792 if (n_recvs) { 3793 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3794 } 3795 for (i=0;i<n_recvs;i++) { 3796 PetscInt j; 3797 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3798 for (j=0;j<*(ptr_idxs+1);j++) { 3799 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3800 } 3801 } else { 3802 /* TODO */ 3803 } 3804 ptr_idxs += olengths_idxs[i]; 3805 } 3806 if (new_local_nnz) { 3807 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3808 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3809 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3810 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3811 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3812 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3813 } else { 3814 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3815 } 3816 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3817 } else { 3818 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3819 } 3820 3821 /* set values */ 3822 ptr_vals = recv_buffer_vals; 3823 ptr_idxs = recv_buffer_idxs_local; 3824 for (i=0;i<n_recvs;i++) { 3825 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3826 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3827 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3828 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3831 } else { 3832 /* TODO */ 3833 } 3834 ptr_idxs += olengths_idxs[i]; 3835 ptr_vals += olengths_vals[i]; 3836 } 3837 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3840 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3841 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3842 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3843 3844 #if 0 3845 if (!restrict_comm) { /* check */ 3846 Vec lvec,rvec; 3847 PetscReal infty_error; 3848 3849 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3850 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3851 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3852 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3853 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3854 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3855 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3856 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3857 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3858 } 3859 #endif 3860 3861 /* assemble new additional is (if any) */ 3862 if (nis) { 3863 PetscInt **temp_idxs,*count_is,j,psum; 3864 3865 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3866 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3867 ptr_idxs = recv_buffer_idxs_is; 3868 psum = 0; 3869 for (i=0;i<n_recvs;i++) { 3870 for (j=0;j<nis;j++) { 3871 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3872 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3873 psum += plen; 3874 ptr_idxs += plen+1; /* shift pointer to received data */ 3875 } 3876 } 3877 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3878 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3879 for (i=1;i<nis;i++) { 3880 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3881 } 3882 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3883 ptr_idxs = recv_buffer_idxs_is; 3884 for (i=0;i<n_recvs;i++) { 3885 for (j=0;j<nis;j++) { 3886 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3887 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3888 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3889 ptr_idxs += plen+1; /* shift pointer to received data */ 3890 } 3891 } 3892 for (i=0;i<nis;i++) { 3893 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3894 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3895 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3896 } 3897 ierr = PetscFree(count_is);CHKERRQ(ierr); 3898 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3899 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3900 } 3901 /* free workspace */ 3902 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3903 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3904 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3905 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3906 if (isdense) { 3907 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3908 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3909 } else { 3910 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3911 } 3912 if (nis) { 3913 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3914 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3915 } 3916 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3917 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3918 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3919 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3920 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3921 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3922 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3923 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3924 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3925 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3926 ierr = PetscFree(onodes);CHKERRQ(ierr); 3927 if (nis) { 3928 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3929 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3930 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3931 } 3932 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3933 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3934 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3935 for (i=0;i<nis;i++) { 3936 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3937 } 3938 } 3939 PetscFunctionReturn(0); 3940 } 3941 3942 /* temporary hack into ksp private data structure */ 3943 #include <petsc/private/kspimpl.h> 3944 3945 #undef __FUNCT__ 3946 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3947 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3948 { 3949 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3950 PC_IS *pcis = (PC_IS*)pc->data; 3951 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3952 MatNullSpace CoarseNullSpace=NULL; 3953 ISLocalToGlobalMapping coarse_islg; 3954 IS coarse_is,*isarray; 3955 PetscInt i,im_active=-1,active_procs=-1; 3956 PetscInt nis,nisdofs,nisneu; 3957 PC pc_temp; 3958 PCType coarse_pc_type; 3959 KSPType coarse_ksp_type; 3960 PetscBool multilevel_requested,multilevel_allowed; 3961 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3962 Mat t_coarse_mat_is; 3963 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3964 PetscMPIInt all_procs; 3965 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3966 PetscBool compute_vecs = PETSC_FALSE; 3967 PetscScalar *array; 3968 PetscErrorCode ierr; 3969 3970 PetscFunctionBegin; 3971 /* Assign global numbering to coarse dofs */ 3972 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3973 PetscInt ocoarse_size; 3974 compute_vecs = PETSC_TRUE; 3975 ocoarse_size = pcbddc->coarse_size; 3976 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3977 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3978 /* see if we can avoid some work */ 3979 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3980 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3981 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3982 coarse_reuse = PETSC_FALSE; 3983 } else { /* we can safely reuse already computed coarse matrix */ 3984 coarse_reuse = PETSC_TRUE; 3985 } 3986 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3987 coarse_reuse = PETSC_FALSE; 3988 } 3989 /* reset any subassembling information */ 3990 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3991 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3992 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3993 coarse_reuse = PETSC_TRUE; 3994 } 3995 3996 /* count "active" (i.e. with positive local size) and "void" processes */ 3997 im_active = !!(pcis->n); 3998 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3999 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4000 void_procs = all_procs-active_procs; 4001 csin_type_simple = PETSC_TRUE; 4002 redist = PETSC_FALSE; 4003 if (pcbddc->current_level && void_procs) { 4004 csin_ml = PETSC_TRUE; 4005 ncoarse_ml = void_procs; 4006 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4007 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4008 csin_ds = PETSC_TRUE; 4009 ncoarse_ds = pcbddc->redistribute_coarse; 4010 redist = PETSC_TRUE; 4011 } else { 4012 csin_ds = PETSC_TRUE; 4013 ncoarse_ds = active_procs; 4014 redist = PETSC_TRUE; 4015 } 4016 } else { 4017 csin_ml = PETSC_FALSE; 4018 ncoarse_ml = all_procs; 4019 if (void_procs) { 4020 csin_ds = PETSC_TRUE; 4021 ncoarse_ds = void_procs; 4022 csin_type_simple = PETSC_FALSE; 4023 } else { 4024 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4025 csin_ds = PETSC_TRUE; 4026 ncoarse_ds = pcbddc->redistribute_coarse; 4027 redist = PETSC_TRUE; 4028 } else { 4029 csin_ds = PETSC_FALSE; 4030 ncoarse_ds = all_procs; 4031 } 4032 } 4033 } 4034 4035 /* 4036 test if we can go multilevel: three conditions must be satisfied: 4037 - we have not exceeded the number of levels requested 4038 - we can actually subassemble the active processes 4039 - we can find a suitable number of MPI processes where we can place the subassembled problem 4040 */ 4041 multilevel_allowed = PETSC_FALSE; 4042 multilevel_requested = PETSC_FALSE; 4043 if (pcbddc->current_level < pcbddc->max_levels) { 4044 multilevel_requested = PETSC_TRUE; 4045 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4046 multilevel_allowed = PETSC_FALSE; 4047 } else { 4048 multilevel_allowed = PETSC_TRUE; 4049 } 4050 } 4051 /* determine number of process partecipating to coarse solver */ 4052 if (multilevel_allowed) { 4053 ncoarse = ncoarse_ml; 4054 csin = csin_ml; 4055 redist = PETSC_FALSE; 4056 } else { 4057 ncoarse = ncoarse_ds; 4058 csin = csin_ds; 4059 } 4060 4061 /* creates temporary l2gmap and IS for coarse indexes */ 4062 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4063 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4064 4065 /* creates temporary MATIS object for coarse matrix */ 4066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4067 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4068 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4069 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4070 #if 0 4071 { 4072 PetscViewer viewer; 4073 char filename[256]; 4074 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4075 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4076 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4077 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4078 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4079 } 4080 #endif 4081 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4082 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4083 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4084 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4085 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4086 4087 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4088 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4089 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4090 const PetscInt *idxs; 4091 ISLocalToGlobalMapping tmap; 4092 4093 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4094 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4095 /* allocate space for temporary storage */ 4096 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4097 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4098 /* allocate for IS array */ 4099 nisdofs = pcbddc->n_ISForDofsLocal; 4100 nisneu = !!pcbddc->NeumannBoundariesLocal; 4101 nis = nisdofs + nisneu; 4102 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4103 /* dofs splitting */ 4104 for (i=0;i<nisdofs;i++) { 4105 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4106 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4107 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4108 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4109 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4110 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4111 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4112 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4113 } 4114 /* neumann boundaries */ 4115 if (pcbddc->NeumannBoundariesLocal) { 4116 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4117 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4118 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4119 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4120 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4121 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4122 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4123 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4124 } 4125 /* free memory */ 4126 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4127 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4128 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4129 } else { 4130 nis = 0; 4131 nisdofs = 0; 4132 nisneu = 0; 4133 isarray = NULL; 4134 } 4135 /* destroy no longer needed map */ 4136 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4137 4138 /* restrict on coarse candidates (if needed) */ 4139 coarse_mat_is = NULL; 4140 if (csin) { 4141 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4142 if (redist) { 4143 PetscMPIInt rank; 4144 PetscInt spc,n_spc_p1,dest[1],destsize; 4145 4146 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4147 spc = active_procs/ncoarse; 4148 n_spc_p1 = active_procs%ncoarse; 4149 if (im_active) { 4150 destsize = 1; 4151 if (rank > n_spc_p1*(spc+1)-1) { 4152 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4153 } else { 4154 dest[0] = rank/(spc+1); 4155 } 4156 } else { 4157 destsize = 0; 4158 } 4159 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4160 } else if (csin_type_simple) { 4161 PetscMPIInt rank; 4162 PetscInt issize,isidx; 4163 4164 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4165 if (im_active) { 4166 issize = 1; 4167 isidx = (PetscInt)rank; 4168 } else { 4169 issize = 0; 4170 isidx = -1; 4171 } 4172 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4173 } else { /* get a suitable subassembling pattern from MATIS code */ 4174 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4175 } 4176 4177 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4178 if (!redist || ncoarse <= void_procs) { 4179 PetscInt ncoarse_cand,tissize,*nisindices; 4180 PetscInt *coarse_candidates; 4181 const PetscInt* tisindices; 4182 4183 /* get coarse candidates' ranks in pc communicator */ 4184 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4185 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4186 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4187 if (!coarse_candidates[i]) { 4188 coarse_candidates[ncoarse_cand++]=i; 4189 } 4190 } 4191 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4192 4193 4194 if (pcbddc->dbg_flag) { 4195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4197 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4199 for (i=0;i<ncoarse_cand;i++) { 4200 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4201 } 4202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4203 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4204 } 4205 /* shift the pattern on coarse candidates */ 4206 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4207 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4208 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4209 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4210 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4211 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4212 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4213 } 4214 if (pcbddc->dbg_flag) { 4215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4216 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4217 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4219 } 4220 } 4221 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4222 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4223 } else { 4224 if (pcbddc->dbg_flag) { 4225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4228 } 4229 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4230 coarse_mat_is = t_coarse_mat_is; 4231 } 4232 4233 /* create local to global scatters for coarse problem */ 4234 if (compute_vecs) { 4235 PetscInt lrows; 4236 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4237 if (coarse_mat_is) { 4238 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4239 } else { 4240 lrows = 0; 4241 } 4242 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4243 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4244 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4245 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4246 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4247 } 4248 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4249 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4250 4251 /* set defaults for coarse KSP and PC */ 4252 if (multilevel_allowed) { 4253 coarse_ksp_type = KSPRICHARDSON; 4254 coarse_pc_type = PCBDDC; 4255 } else { 4256 coarse_ksp_type = KSPPREONLY; 4257 coarse_pc_type = PCREDUNDANT; 4258 } 4259 4260 /* print some info if requested */ 4261 if (pcbddc->dbg_flag) { 4262 if (!multilevel_allowed) { 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4264 if (multilevel_requested) { 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4266 } else if (pcbddc->max_levels) { 4267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4268 } 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 } 4271 } 4272 4273 /* create the coarse KSP object only once with defaults */ 4274 if (coarse_mat_is) { 4275 MatReuse coarse_mat_reuse; 4276 PetscViewer dbg_viewer = NULL; 4277 if (pcbddc->dbg_flag) { 4278 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4279 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4280 } 4281 if (!pcbddc->coarse_ksp) { 4282 char prefix[256],str_level[16]; 4283 size_t len; 4284 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4285 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4286 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4287 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4288 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4289 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4290 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4291 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4292 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4293 /* prefix */ 4294 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4295 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4296 if (!pcbddc->current_level) { 4297 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4298 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4299 } else { 4300 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4301 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4302 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4303 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4304 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4305 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4306 } 4307 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4308 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4309 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4310 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4311 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4312 /* allow user customization */ 4313 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4314 } 4315 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4316 if (nisdofs) { 4317 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4318 for (i=0;i<nisdofs;i++) { 4319 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4320 } 4321 } 4322 if (nisneu) { 4323 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4324 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4325 } 4326 4327 /* get some info after set from options */ 4328 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4329 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4330 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4331 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4332 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4333 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4334 isbddc = PETSC_FALSE; 4335 } 4336 if (isredundant) { 4337 KSP inner_ksp; 4338 PC inner_pc; 4339 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4340 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4341 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4342 } 4343 4344 /* assemble coarse matrix */ 4345 if (coarse_reuse) { 4346 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4347 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4348 coarse_mat_reuse = MAT_REUSE_MATRIX; 4349 } else { 4350 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4351 } 4352 if (isbddc || isnn) { 4353 if (pcbddc->coarsening_ratio > 1) { 4354 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4355 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4356 if (pcbddc->dbg_flag) { 4357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4358 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4359 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4360 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4361 } 4362 } 4363 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4364 } else { 4365 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4366 coarse_mat = coarse_mat_is; 4367 } 4368 } else { 4369 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4370 } 4371 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4372 4373 /* propagate symmetry info of coarse matrix */ 4374 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4375 if (pc->pmat->symmetric_set) { 4376 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4377 } 4378 if (pc->pmat->hermitian_set) { 4379 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4380 } 4381 if (pc->pmat->spd_set) { 4382 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4383 } 4384 /* set operators */ 4385 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4386 if (pcbddc->dbg_flag) { 4387 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4388 } 4389 } else { /* processes non partecipating to coarse solver (if any) */ 4390 coarse_mat = 0; 4391 } 4392 ierr = PetscFree(isarray);CHKERRQ(ierr); 4393 #if 0 4394 { 4395 PetscViewer viewer; 4396 char filename[256]; 4397 sprintf(filename,"coarse_mat.m"); 4398 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4399 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4400 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4401 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4402 } 4403 #endif 4404 4405 /* Compute coarse null space (special handling by BDDC only) */ 4406 if (pcbddc->NullSpace) { 4407 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4408 } 4409 4410 if (pcbddc->coarse_ksp) { 4411 Vec crhs,csol; 4412 PetscBool ispreonly; 4413 if (CoarseNullSpace) { 4414 if (isbddc) { 4415 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4416 } else { 4417 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4418 } 4419 } 4420 /* setup coarse ksp */ 4421 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4422 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4423 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4424 /* hack */ 4425 if (!csol) { 4426 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4427 } 4428 if (!crhs) { 4429 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4430 } 4431 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4432 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4433 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4434 KSP check_ksp; 4435 KSPType check_ksp_type; 4436 PC check_pc; 4437 Vec check_vec,coarse_vec; 4438 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4439 PetscInt its; 4440 PetscBool compute_eigs; 4441 PetscReal *eigs_r,*eigs_c; 4442 PetscInt neigs; 4443 const char *prefix; 4444 4445 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4446 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4447 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4448 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4449 if (ispreonly) { 4450 check_ksp_type = KSPPREONLY; 4451 compute_eigs = PETSC_FALSE; 4452 } else { 4453 check_ksp_type = KSPGMRES; 4454 compute_eigs = PETSC_TRUE; 4455 } 4456 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4457 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4458 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4459 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4460 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4461 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4462 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4463 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4464 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4465 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4466 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4467 /* create random vec */ 4468 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4469 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4470 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4471 if (CoarseNullSpace) { 4472 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4473 } 4474 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4475 /* solve coarse problem */ 4476 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4477 if (CoarseNullSpace) { 4478 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4479 } 4480 /* set eigenvalue estimation if preonly has not been requested */ 4481 if (compute_eigs) { 4482 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4483 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4484 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4485 lambda_max = eigs_r[neigs-1]; 4486 lambda_min = eigs_r[0]; 4487 if (pcbddc->use_coarse_estimates) { 4488 if (lambda_max>lambda_min) { 4489 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4490 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4491 } 4492 } 4493 } 4494 4495 /* check coarse problem residual error */ 4496 if (pcbddc->dbg_flag) { 4497 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4498 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4499 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4500 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4501 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4502 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4503 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4504 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4505 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4506 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4507 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4508 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4509 if (compute_eigs) { 4510 PetscReal lambda_max_s,lambda_min_s; 4511 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4512 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4513 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4514 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4515 for (i=0;i<neigs;i++) { 4516 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4517 } 4518 } 4519 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4520 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4521 } 4522 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4523 if (compute_eigs) { 4524 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4525 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4526 } 4527 } 4528 } 4529 /* print additional info */ 4530 if (pcbddc->dbg_flag) { 4531 /* waits until all processes reaches this point */ 4532 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4533 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4534 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4535 } 4536 4537 /* free memory */ 4538 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4539 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 4543 #undef __FUNCT__ 4544 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4545 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4546 { 4547 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4548 PC_IS* pcis = (PC_IS*)pc->data; 4549 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4550 PetscInt i,local_size,coarse_size=0; 4551 PetscInt *local_primal_indices=NULL; 4552 PetscInt *t_local_primal_indices=NULL; 4553 PetscErrorCode ierr; 4554 4555 PetscFunctionBegin; 4556 /* Compute global number of coarse dofs */ 4557 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4558 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4559 } 4560 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4561 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4562 local_size = 0; 4563 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4564 PetscInt j; 4565 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4566 } 4567 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4568 if (local_size != pcbddc->local_primal_size) { 4569 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4570 } 4571 4572 /* check numbering */ 4573 if (pcbddc->dbg_flag) { 4574 PetscScalar coarsesum,*array; 4575 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4576 4577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4580 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4581 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4582 for (i=0;i<pcbddc->local_primal_size;i++) { 4583 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4584 } 4585 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4586 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4587 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4588 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4589 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4590 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4591 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4592 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4593 for (i=0;i<pcis->n;i++) { 4594 if (array[i] == 1.0) { 4595 set_error = PETSC_TRUE; 4596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4597 } 4598 } 4599 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4600 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4601 for (i=0;i<pcis->n;i++) { 4602 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4603 } 4604 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4605 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4606 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4607 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4608 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4610 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4611 PetscInt *gidxs; 4612 4613 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4614 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4615 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4616 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4618 for (i=0;i<pcbddc->local_primal_size;i++) { 4619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4620 } 4621 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4622 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4623 } 4624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4625 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4626 } 4627 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4628 /* get back data */ 4629 *coarse_size_n = coarse_size; 4630 *local_primal_indices_n = local_primal_indices; 4631 PetscFunctionReturn(0); 4632 } 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "PCBDDCGlobalToLocal" 4636 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4637 { 4638 IS localis_t; 4639 PetscInt i,lsize,*idxs,n; 4640 PetscScalar *vals; 4641 PetscErrorCode ierr; 4642 4643 PetscFunctionBegin; 4644 /* get indices in local ordering exploiting local to global map */ 4645 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4646 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4647 for (i=0;i<lsize;i++) vals[i] = 1.0; 4648 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4649 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4650 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4651 if (idxs) { /* multilevel guard */ 4652 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4653 } 4654 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4655 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4656 ierr = PetscFree(vals);CHKERRQ(ierr); 4657 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4658 /* now compute set in local ordering */ 4659 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4660 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4661 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4662 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4663 for (i=0,lsize=0;i<n;i++) { 4664 if (PetscRealPart(vals[i]) > 0.5) { 4665 lsize++; 4666 } 4667 } 4668 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4669 for (i=0,lsize=0;i<n;i++) { 4670 if (PetscRealPart(vals[i]) > 0.5) { 4671 idxs[lsize++] = i; 4672 } 4673 } 4674 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4675 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4676 *localis = localis_t; 4677 PetscFunctionReturn(0); 4678 } 4679 4680 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4681 #undef __FUNCT__ 4682 #define __FUNCT__ "PCBDDCMatMult_Private" 4683 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4684 { 4685 PCBDDCChange_ctx change_ctx; 4686 PetscErrorCode ierr; 4687 4688 PetscFunctionBegin; 4689 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4690 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4691 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4692 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4693 PetscFunctionReturn(0); 4694 } 4695 4696 #undef __FUNCT__ 4697 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4698 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4699 { 4700 PCBDDCChange_ctx change_ctx; 4701 PetscErrorCode ierr; 4702 4703 PetscFunctionBegin; 4704 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4705 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4706 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4707 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 #undef __FUNCT__ 4712 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4713 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4714 { 4715 PC_IS *pcis=(PC_IS*)pc->data; 4716 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4717 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4718 Mat S_j; 4719 PetscInt *used_xadj,*used_adjncy; 4720 PetscBool free_used_adj; 4721 PetscErrorCode ierr; 4722 4723 PetscFunctionBegin; 4724 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4725 free_used_adj = PETSC_FALSE; 4726 if (pcbddc->sub_schurs_layers == -1) { 4727 used_xadj = NULL; 4728 used_adjncy = NULL; 4729 } else { 4730 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4731 used_xadj = pcbddc->mat_graph->xadj; 4732 used_adjncy = pcbddc->mat_graph->adjncy; 4733 } else if (pcbddc->computed_rowadj) { 4734 used_xadj = pcbddc->mat_graph->xadj; 4735 used_adjncy = pcbddc->mat_graph->adjncy; 4736 } else { 4737 PetscBool flg_row=PETSC_FALSE; 4738 const PetscInt *xadj,*adjncy; 4739 PetscInt nvtxs; 4740 4741 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4742 if (flg_row) { 4743 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4744 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4745 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4746 free_used_adj = PETSC_TRUE; 4747 } else { 4748 pcbddc->sub_schurs_layers = -1; 4749 used_xadj = NULL; 4750 used_adjncy = NULL; 4751 } 4752 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4753 } 4754 } 4755 4756 /* setup sub_schurs data */ 4757 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4758 if (!sub_schurs->use_mumps) { 4759 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4760 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4761 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4762 } else { 4763 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4764 if (!pcbddc->use_vertices && reuse_solvers) { 4765 PetscInt n_vertices; 4766 4767 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4768 reuse_solvers = (PetscBool)!n_vertices; 4769 } 4770 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4771 } 4772 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4773 4774 /* free adjacency */ 4775 if (free_used_adj) { 4776 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4777 } 4778 PetscFunctionReturn(0); 4779 } 4780 4781 #undef __FUNCT__ 4782 #define __FUNCT__ "PCBDDCInitSubSchurs" 4783 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4784 { 4785 PC_IS *pcis=(PC_IS*)pc->data; 4786 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4787 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4788 PCBDDCGraph graph; 4789 PetscErrorCode ierr; 4790 4791 PetscFunctionBegin; 4792 /* attach interface graph for determining subsets */ 4793 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4794 IS verticesIS,verticescomm; 4795 PetscInt vsize,*idxs; 4796 4797 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4798 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4799 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4800 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4801 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4802 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4803 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4804 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4805 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4806 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4807 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4808 /* 4809 if (pcbddc->dbg_flag) { 4810 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4811 } 4812 */ 4813 } else { 4814 graph = pcbddc->mat_graph; 4815 } 4816 4817 /* sub_schurs init */ 4818 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4819 4820 /* free graph struct */ 4821 if (pcbddc->sub_schurs_rebuild) { 4822 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4823 } 4824 PetscFunctionReturn(0); 4825 } 4826