1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 467 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 468 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 474 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 475 { 476 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 477 PC_IS *pcis = (PC_IS*)pc->data; 478 VecType impVecType; 479 PetscInt n_constraints,n_R,old_size; 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 if (!pcbddc->ConstraintMatrix) { 484 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 485 } 486 /* get sizes */ 487 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 488 n_R = pcis->n-pcbddc->n_vertices; 489 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 490 /* local work vectors (try to avoid unneeded work)*/ 491 /* R nodes */ 492 old_size = -1; 493 if (pcbddc->vec1_R) { 494 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 495 } 496 if (n_R != old_size) { 497 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 498 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 502 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 503 } 504 /* local primal dofs */ 505 old_size = -1; 506 if (pcbddc->vec1_P) { 507 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 508 } 509 if (pcbddc->local_primal_size != old_size) { 510 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 513 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 514 } 515 /* local explicit constraints */ 516 old_size = -1; 517 if (pcbddc->vec1_C) { 518 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 519 } 520 if (n_constraints && n_constraints != old_size) { 521 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 522 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 523 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 524 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "PCBDDCSetUpCorrection" 531 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 532 { 533 PetscErrorCode ierr; 534 /* pointers to pcis and pcbddc */ 535 PC_IS* pcis = (PC_IS*)pc->data; 536 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 537 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 538 /* submatrices of local problem */ 539 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 540 /* submatrices of local coarse problem */ 541 Mat S_VV,S_CV,S_VC,S_CC; 542 /* working matrices */ 543 Mat C_CR; 544 /* additional working stuff */ 545 PC pc_R; 546 Mat F; 547 PetscBool isLU,isCHOL,isILU; 548 549 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 550 PetscScalar *work; 551 PetscInt *idx_V_B; 552 PetscInt n,n_vertices,n_constraints; 553 PetscInt i,n_R,n_D,n_B; 554 PetscBool unsymmetric_check; 555 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 556 MatType impMatType; 557 /* some shortcuts to scalars */ 558 PetscScalar one=1.0,m_one=-1.0; 559 560 PetscFunctionBegin; 561 n_vertices = pcbddc->n_vertices; 562 n_constraints = pcbddc->local_primal_size-n_vertices; 563 /* Set Non-overlapping dimensions */ 564 n_B = pcis->n_B; 565 n_D = pcis->n - n_B; 566 n_R = pcis->n - n_vertices; 567 568 /* Set types for local objects needed by BDDC precondtioner */ 569 impMatType = MATSEQDENSE; 570 571 /* vertices in boundary numbering */ 572 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 573 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 574 if (i != n_vertices) { 575 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 576 } 577 578 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 579 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 581 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 582 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 583 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 584 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 585 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 586 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 587 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 588 589 unsymmetric_check = PETSC_FALSE; 590 /* allocate workspace */ 591 n = 0; 592 if (n_constraints) { 593 n += n_R*n_constraints; 594 } 595 if (n_vertices) { 596 n = PetscMax(2*n_R*n_vertices,n); 597 n = PetscMax((n_R+n_B)*n_vertices,n); 598 } 599 if (!pcbddc->symmetric_primal) { 600 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 601 unsymmetric_check = PETSC_TRUE; 602 } 603 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 604 605 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 606 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 607 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 608 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 609 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 610 if (isLU || isILU || isCHOL) { 611 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 612 } else if (sub_schurs->reuse_mumps) { 613 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 614 MatFactorType type; 615 616 F = reuse_mumps->F; 617 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 618 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 619 } else { 620 F = NULL; 621 } 622 623 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 624 if (n_constraints) { 625 Mat M1,M2,M3; 626 Mat auxmat; 627 IS is_aux; 628 PetscScalar *array,*array2; 629 630 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 631 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 632 633 /* Extract constraints on R nodes: C_{CR} */ 634 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 635 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 636 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 637 638 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 639 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 640 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 641 for (i=0;i<n_constraints;i++) { 642 const PetscScalar *row_cmat_values; 643 const PetscInt *row_cmat_indices; 644 PetscInt size_of_constraint,j; 645 646 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 647 for (j=0;j<size_of_constraint;j++) { 648 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 649 } 650 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 651 } 652 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 653 if (F) { 654 Mat B; 655 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 657 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 658 ierr = MatDestroy(&B);CHKERRQ(ierr); 659 } else { 660 PetscScalar *marr; 661 662 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 663 for (i=0;i<n_constraints;i++) { 664 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 665 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 666 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 667 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 668 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 669 } 670 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 671 } 672 if (!pcbddc->switch_static) { 673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 674 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 675 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 676 for (i=0;i<n_constraints;i++) { 677 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 678 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 679 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 681 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 682 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 683 } 684 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 685 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 686 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 687 } else { 688 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 689 pcbddc->local_auxmat2 = local_auxmat2_R; 690 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 691 } 692 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 693 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 694 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 695 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 696 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 697 if (isCHOL) { 698 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 699 } else { 700 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 701 } 702 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 703 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 704 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 705 ierr = MatDestroy(&M2);CHKERRQ(ierr); 706 ierr = MatDestroy(&M3);CHKERRQ(ierr); 707 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 708 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 709 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 710 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 711 ierr = MatDestroy(&M1);CHKERRQ(ierr); 712 } 713 /* Get submatrices from subdomain matrix */ 714 if (n_vertices) { 715 IS is_aux; 716 717 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 718 IS tis; 719 720 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 721 ierr = ISSort(tis);CHKERRQ(ierr); 722 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 ierr = ISDestroy(&tis);CHKERRQ(ierr); 724 } else { 725 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 726 } 727 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 729 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 730 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 731 } 732 733 /* Matrix of coarse basis functions (local) */ 734 if (pcbddc->coarse_phi_B) { 735 PetscInt on_B,on_primal,on_D=n_D; 736 if (pcbddc->coarse_phi_D) { 737 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 738 } 739 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 740 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 741 PetscScalar *marray; 742 743 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 744 ierr = PetscFree(marray);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 746 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 747 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 748 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 749 } 750 } 751 752 if (!pcbddc->coarse_phi_B) { 753 PetscScalar *marray; 754 755 n = n_B*pcbddc->local_primal_size; 756 if (pcbddc->switch_static || pcbddc->dbg_flag) { 757 n += n_D*pcbddc->local_primal_size; 758 } 759 if (!pcbddc->symmetric_primal) { 760 n *= 2; 761 } 762 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 764 n = n_B*pcbddc->local_primal_size; 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 767 n += n_D*pcbddc->local_primal_size; 768 } 769 if (!pcbddc->symmetric_primal) { 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 771 if (pcbddc->switch_static || pcbddc->dbg_flag) { 772 n = n_B*pcbddc->local_primal_size; 773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 774 } 775 } else { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 777 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 778 if (pcbddc->switch_static || pcbddc->dbg_flag) { 779 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 780 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 781 } 782 } 783 } 784 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 785 /* vertices */ 786 if (n_vertices) { 787 788 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 789 790 if (n_R) { 791 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 792 PetscBLASInt B_N,B_one = 1; 793 PetscScalar *x,*y; 794 PetscBool isseqaij; 795 796 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 797 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 798 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 799 if (F) { /* TODO could be optimized for symmetric problems */ 800 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 801 } else { 802 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 803 for (i=0;i<n_vertices;i++) { 804 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 805 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 806 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 807 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 808 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 809 } 810 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 811 } 812 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 813 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 814 if (n_constraints) { 815 Mat B; 816 817 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 818 for (i=0;i<n_vertices;i++) { 819 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 820 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 821 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 824 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 825 } 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 828 ierr = MatDestroy(&B);CHKERRQ(ierr); 829 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 830 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 831 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 832 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 833 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 834 ierr = MatDestroy(&B);CHKERRQ(ierr); 835 } 836 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 837 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 838 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 839 } 840 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 841 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 842 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 843 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 845 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 846 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 847 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 848 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 850 } else { 851 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 852 } 853 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 854 /* coarse basis functions */ 855 for (i=0;i<n_vertices;i++) { 856 PetscScalar *y; 857 858 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 859 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 860 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 y[n_B*i+idx_V_B[i]] = 1.0; 864 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 865 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 866 867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 868 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 870 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 874 } 875 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 876 } 877 /* if n_R == 0 the object is not destroyed */ 878 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 879 } 880 881 if (n_constraints) { 882 Mat B; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 885 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 886 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 887 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 888 if (n_vertices) { 889 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 890 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 891 } else { 892 Mat S_VCt; 893 894 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 895 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 896 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 897 } 898 } 899 ierr = MatDestroy(&B);CHKERRQ(ierr); 900 /* coarse basis functions */ 901 for (i=0;i<n_constraints;i++) { 902 PetscScalar *y; 903 904 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 905 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 906 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 917 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 918 } 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 } 921 } 922 if (n_constraints) { 923 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 924 } 925 926 /* compute other basis functions for non-symmetric problems */ 927 if (!pcbddc->symmetric_primal) { 928 929 if (n_constraints) { 930 Mat S_CCT,B_C; 931 932 /* this is a lazy thing */ 933 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 935 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 936 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 938 if (n_vertices) { 939 Mat B_V,S_VCT; 940 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 942 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 944 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 945 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 946 } 947 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 948 } else { /* if there are no constraints, reset work */ 949 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 950 } 951 if (n_vertices && n_R) { 952 Mat A_VRT; 953 PetscScalar *marray; 954 PetscBLASInt B_N,B_one = 1; 955 956 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 957 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 961 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 962 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 963 } 964 965 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 966 for (i=0;i<pcbddc->local_primal_size;i++) { 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 968 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 969 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 970 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 971 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 972 } 973 } else { 974 for (i=0;i<pcbddc->local_primal_size;i++) { 975 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 976 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 977 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 980 } 981 } 982 /* coarse basis functions */ 983 for (i=0;i<pcbddc->local_primal_size;i++) { 984 PetscScalar *y; 985 986 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 987 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 988 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 989 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 if (i<n_vertices) { 992 y[n_B*i+idx_V_B[i]] = 1.0; 993 } 994 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 996 997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 998 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1003 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1004 } 1005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1006 } 1007 } 1008 /* free memory */ 1009 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1011 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1012 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1013 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1014 ierr = PetscFree(work);CHKERRQ(ierr); 1015 if (n_vertices) { 1016 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1017 } 1018 if (n_constraints) { 1019 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1020 } 1021 /* Checking coarse_sub_mat and coarse basis functios */ 1022 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1023 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1024 if (pcbddc->dbg_flag) { 1025 Mat coarse_sub_mat; 1026 Mat AUXMAT,TM1,TM2,TM3,TM4; 1027 Mat coarse_phi_D,coarse_phi_B; 1028 Mat coarse_psi_D,coarse_psi_B; 1029 Mat A_II,A_BB,A_IB,A_BI; 1030 Mat C_B,CPHI; 1031 IS is_dummy; 1032 Vec mones; 1033 MatType checkmattype=MATSEQAIJ; 1034 PetscReal real_value; 1035 1036 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1037 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1038 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1039 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1040 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1042 if (unsymmetric_check) { 1043 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1044 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1045 } 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1047 1048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1051 if (unsymmetric_check) { 1052 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 } else { 1065 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1066 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1072 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1073 } 1074 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1077 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1078 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1079 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1082 1083 /* check constraints */ 1084 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1085 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1086 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1087 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 if (unsymmetric_check) { 1093 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1094 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1095 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1096 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1098 } 1099 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1100 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1101 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1102 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1103 1104 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1106 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1107 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1108 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1110 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1111 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1112 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1113 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1115 if (unsymmetric_check) { 1116 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1117 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1118 } 1119 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1120 } 1121 /* get back data */ 1122 *coarse_submat_vals_n = coarse_submat_vals; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1128 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1129 { 1130 Mat *work_mat; 1131 IS isrow_s,iscol_s; 1132 PetscBool rsorted,csorted; 1133 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1138 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1139 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1140 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1141 1142 if (!rsorted) { 1143 const PetscInt *idxs; 1144 PetscInt *idxs_sorted,i; 1145 1146 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1147 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1148 for (i=0;i<rsize;i++) { 1149 idxs_perm_r[i] = i; 1150 } 1151 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1152 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1153 for (i=0;i<rsize;i++) { 1154 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1155 } 1156 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1157 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1158 } else { 1159 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1160 isrow_s = isrow; 1161 } 1162 1163 if (!csorted) { 1164 if (isrow == iscol) { 1165 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1166 iscol_s = isrow_s; 1167 } else { 1168 const PetscInt *idxs; 1169 PetscInt *idxs_sorted,i; 1170 1171 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1172 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1173 for (i=0;i<csize;i++) { 1174 idxs_perm_c[i] = i; 1175 } 1176 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1177 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1178 for (i=0;i<csize;i++) { 1179 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1180 } 1181 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1182 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1183 } 1184 } else { 1185 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1186 iscol_s = iscol; 1187 } 1188 1189 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1190 1191 if (!rsorted || !csorted) { 1192 Mat new_mat; 1193 IS is_perm_r,is_perm_c; 1194 1195 if (!rsorted) { 1196 PetscInt *idxs_r,i; 1197 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1198 for (i=0;i<rsize;i++) { 1199 idxs_r[idxs_perm_r[i]] = i; 1200 } 1201 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1202 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1203 } else { 1204 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1205 } 1206 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1207 1208 if (!csorted) { 1209 if (isrow_s == iscol_s) { 1210 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1211 is_perm_c = is_perm_r; 1212 } else { 1213 PetscInt *idxs_c,i; 1214 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1215 for (i=0;i<csize;i++) { 1216 idxs_c[idxs_perm_c[i]] = i; 1217 } 1218 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 } else { 1222 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1223 } 1224 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1225 1226 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1227 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1228 work_mat[0] = new_mat; 1229 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1230 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1231 } 1232 1233 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1234 *B = work_mat[0]; 1235 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1236 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1237 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1243 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1244 { 1245 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1246 PC_IS* pcis = (PC_IS*)pc->data; 1247 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1248 Mat new_mat; 1249 IS is_local,is_global; 1250 PetscInt local_size; 1251 PetscBool isseqaij; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1256 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1257 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1258 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1259 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1260 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1261 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1262 1263 /* check */ 1264 if (pcbddc->dbg_flag) { 1265 Vec x,x_change; 1266 PetscReal error; 1267 1268 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1269 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1270 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1274 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1276 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1277 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1280 ierr = VecDestroy(&x);CHKERRQ(ierr); 1281 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1282 } 1283 1284 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1285 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1286 if (isseqaij) { 1287 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1288 } else { 1289 Mat work_mat; 1290 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1291 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1292 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1293 } 1294 if (matis->A->symmetric_set) { 1295 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1296 #if !defined(PETSC_USE_COMPLEX) 1297 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1298 #endif 1299 } 1300 /* 1301 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1302 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1303 */ 1304 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1310 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1311 { 1312 PC_IS* pcis = (PC_IS*)(pc->data); 1313 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1314 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1315 PetscInt *idx_R_local=NULL; 1316 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1317 PetscInt vbs,bs; 1318 PetscBT bitmask=NULL; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 /* 1323 No need to setup local scatters if 1324 - primal space is unchanged 1325 AND 1326 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1327 AND 1328 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1329 */ 1330 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1331 PetscFunctionReturn(0); 1332 } 1333 /* destroy old objects */ 1334 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1335 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1336 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1337 /* Set Non-overlapping dimensions */ 1338 n_B = pcis->n_B; 1339 n_D = pcis->n - n_B; 1340 n_vertices = pcbddc->n_vertices; 1341 1342 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1343 1344 /* create auxiliary bitmask and allocate workspace */ 1345 if (!sub_schurs->reuse_mumps) { 1346 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1347 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1348 for (i=0;i<n_vertices;i++) { 1349 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1350 } 1351 1352 for (i=0, n_R=0; i<pcis->n; i++) { 1353 if (!PetscBTLookup(bitmask,i)) { 1354 idx_R_local[n_R++] = i; 1355 } 1356 } 1357 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1358 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1359 1360 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1361 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1362 } 1363 1364 /* Block code */ 1365 vbs = 1; 1366 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1367 if (bs>1 && !(n_vertices%bs)) { 1368 PetscBool is_blocked = PETSC_TRUE; 1369 PetscInt *vary; 1370 if (!sub_schurs->reuse_mumps) { 1371 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1372 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1373 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1374 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1375 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1376 for (i=0; i<pcis->n/bs; i++) { 1377 if (vary[i]!=0 && vary[i]!=bs) { 1378 is_blocked = PETSC_FALSE; 1379 break; 1380 } 1381 } 1382 ierr = PetscFree(vary);CHKERRQ(ierr); 1383 } else { 1384 /* Verify directly the R set */ 1385 for (i=0; i<n_R/bs; i++) { 1386 PetscInt j,node=idx_R_local[bs*i]; 1387 for (j=1; j<bs; j++) { 1388 if (node != idx_R_local[bs*i+j]-j) { 1389 is_blocked = PETSC_FALSE; 1390 break; 1391 } 1392 } 1393 } 1394 } 1395 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1396 vbs = bs; 1397 for (i=0;i<n_R/vbs;i++) { 1398 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1399 } 1400 } 1401 } 1402 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1403 if (sub_schurs->reuse_mumps) { 1404 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1405 1406 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1407 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1408 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1409 reuse_mumps->is_R = pcbddc->is_R_local; 1410 } else { 1411 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1412 } 1413 1414 /* print some info if requested */ 1415 if (pcbddc->dbg_flag) { 1416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1423 } 1424 1425 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1426 if (!sub_schurs->reuse_mumps) { 1427 IS is_aux1,is_aux2; 1428 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1429 1430 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1431 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1432 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1433 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1434 for (i=0; i<n_D; i++) { 1435 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1436 } 1437 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 for (i=0, j=0; i<n_R; i++) { 1439 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1440 aux_array1[j++] = i; 1441 } 1442 } 1443 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1444 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1445 for (i=0, j=0; i<n_B; i++) { 1446 if (!PetscBTLookup(bitmask,is_indices[i])) { 1447 aux_array2[j++] = i; 1448 } 1449 } 1450 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1451 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1452 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1453 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1454 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1455 1456 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1457 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1458 for (i=0, j=0; i<n_R; i++) { 1459 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1460 aux_array1[j++] = i; 1461 } 1462 } 1463 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1464 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1465 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1466 } 1467 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1468 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1469 } else { 1470 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1471 IS tis; 1472 PetscInt schur_size; 1473 1474 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1475 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1476 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1477 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1478 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1479 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1480 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1481 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1482 } 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1490 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1491 { 1492 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1493 PC_IS *pcis = (PC_IS*)pc->data; 1494 PC pc_temp; 1495 Mat A_RR; 1496 MatReuse reuse; 1497 PetscScalar m_one = -1.0; 1498 PetscReal value; 1499 PetscInt n_D,n_R; 1500 PetscBool use_exact,use_exact_reduced,issbaij; 1501 PetscErrorCode ierr; 1502 /* prefixes stuff */ 1503 char dir_prefix[256],neu_prefix[256],str_level[16]; 1504 size_t len; 1505 1506 PetscFunctionBegin; 1507 1508 /* compute prefixes */ 1509 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1510 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1511 if (!pcbddc->current_level) { 1512 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1513 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1514 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1515 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1516 } else { 1517 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1518 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1519 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1520 len -= 15; /* remove "pc_bddc_coarse_" */ 1521 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1522 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1523 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1524 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1525 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1526 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1527 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1528 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1529 } 1530 1531 /* DIRICHLET PROBLEM */ 1532 if (dirichlet) { 1533 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1534 if (pcbddc->local_mat->symmetric_set) { 1535 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1536 } 1537 /* Matrix for Dirichlet problem is pcis->A_II */ 1538 n_D = pcis->n - pcis->n_B; 1539 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1540 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1541 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1542 /* default */ 1543 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1544 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1545 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1546 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1547 if (issbaij) { 1548 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1549 } else { 1550 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1551 } 1552 /* Allow user's customization */ 1553 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1554 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1555 } 1556 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1557 if (sub_schurs->reuse_mumps) { 1558 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1559 1560 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1561 } 1562 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1563 if (!n_D) { 1564 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1565 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1566 } 1567 /* Set Up KSP for Dirichlet problem of BDDC */ 1568 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1569 /* set ksp_D into pcis data */ 1570 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1571 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1572 pcis->ksp_D = pcbddc->ksp_D; 1573 } 1574 1575 /* NEUMANN PROBLEM */ 1576 A_RR = 0; 1577 if (neumann) { 1578 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1579 PetscInt ibs,mbs; 1580 PetscBool issbaij; 1581 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1582 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1583 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1584 if (pcbddc->ksp_R) { /* already created ksp */ 1585 PetscInt nn_R; 1586 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1587 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1588 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1589 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1590 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1592 reuse = MAT_INITIAL_MATRIX; 1593 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1594 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1595 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1596 reuse = MAT_INITIAL_MATRIX; 1597 } else { /* safe to reuse the matrix */ 1598 reuse = MAT_REUSE_MATRIX; 1599 } 1600 } 1601 /* last check */ 1602 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1603 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1604 reuse = MAT_INITIAL_MATRIX; 1605 } 1606 } else { /* first time, so we need to create the matrix */ 1607 reuse = MAT_INITIAL_MATRIX; 1608 } 1609 /* extract A_RR */ 1610 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1611 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1612 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1613 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1614 if (matis->A == pcbddc->local_mat) { 1615 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1616 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1617 } else { 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1619 } 1620 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1621 if (matis->A == pcbddc->local_mat) { 1622 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1623 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1624 } else { 1625 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1626 } 1627 } 1628 if (!sub_schurs->reuse_mumps) { 1629 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1630 if (pcbddc->local_mat->symmetric_set) { 1631 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1632 } 1633 } else { 1634 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1635 1636 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1637 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1638 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1639 } 1640 if (!pcbddc->ksp_R) { /* create object if not present */ 1641 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1642 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1643 /* default */ 1644 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1645 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1646 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1647 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1648 if (issbaij) { 1649 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1650 } else { 1651 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1652 } 1653 /* Allow user's customization */ 1654 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1655 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1656 } 1657 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1658 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1659 if (!n_R) { 1660 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1661 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1662 } 1663 /* Reuse MUMPS solver if it is present */ 1664 if (sub_schurs->reuse_mumps) { 1665 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1666 1667 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1668 } 1669 /* Set Up KSP for Neumann problem of BDDC */ 1670 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1671 } 1672 /* free Neumann problem's matrix */ 1673 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1674 1675 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1676 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1677 if (pcbddc->dbg_flag) { 1678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1679 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1680 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1681 } 1682 if (dirichlet) { /* Dirichlet */ 1683 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1684 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1685 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1686 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1687 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1688 /* need to be adapted? */ 1689 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1690 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1691 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1692 /* print info */ 1693 if (pcbddc->dbg_flag) { 1694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1696 } 1697 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1698 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1699 } 1700 } 1701 if (neumann) { /* Neumann */ 1702 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1703 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1704 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1705 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1706 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1707 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1708 /* need to be adapted? */ 1709 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1710 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1711 /* print info */ 1712 if (pcbddc->dbg_flag) { 1713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1715 } 1716 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1717 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1718 } 1719 } 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1726 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1727 { 1728 PetscErrorCode ierr; 1729 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1730 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1731 1732 PetscFunctionBegin; 1733 if (!sub_schurs->reuse_mumps) { 1734 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1735 } 1736 if (!pcbddc->switch_static) { 1737 if (applytranspose && pcbddc->local_auxmat1) { 1738 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1739 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1740 } 1741 if (!sub_schurs->reuse_mumps) { 1742 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1743 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1744 } else { 1745 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1746 1747 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1749 } 1750 } else { 1751 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1755 if (applytranspose && pcbddc->local_auxmat1) { 1756 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1757 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1758 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1759 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1760 } 1761 } 1762 if (!sub_schurs->reuse_mumps) { 1763 if (applytranspose) { 1764 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1765 } else { 1766 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1767 } 1768 #if defined(PETSC_HAVE_MUMPS) 1769 } else { 1770 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1771 1772 if (applytranspose) { 1773 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1774 } else { 1775 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1776 } 1777 #endif 1778 } 1779 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1780 if (!pcbddc->switch_static) { 1781 if (!sub_schurs->reuse_mumps) { 1782 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 } else { 1785 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1786 1787 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1788 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1789 } 1790 if (!applytranspose && pcbddc->local_auxmat1) { 1791 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1792 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1793 } 1794 } else { 1795 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1796 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 if (!applytranspose && pcbddc->local_auxmat1) { 1800 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1801 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1802 } 1803 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1805 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1806 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 1811 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1814 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1815 { 1816 PetscErrorCode ierr; 1817 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1818 PC_IS* pcis = (PC_IS*) (pc->data); 1819 const PetscScalar zero = 0.0; 1820 1821 PetscFunctionBegin; 1822 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1823 if (applytranspose) { 1824 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1825 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1826 } else { 1827 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1828 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1829 } 1830 /* start communications from local primal nodes to rhs of coarse solver */ 1831 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1832 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1833 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1834 1835 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1836 /* TODO remove null space when doing multilevel */ 1837 if (pcbddc->coarse_ksp) { 1838 Vec rhs,sol; 1839 1840 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1841 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1842 if (applytranspose) { 1843 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1844 } else { 1845 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1846 } 1847 } 1848 1849 /* Local solution on R nodes */ 1850 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1851 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1852 } 1853 1854 /* communications from coarse sol to local primal nodes */ 1855 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1856 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1857 1858 /* Sum contributions from two levels */ 1859 if (applytranspose) { 1860 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1861 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1862 } else { 1863 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1864 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1871 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1872 { 1873 PetscErrorCode ierr; 1874 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1875 PetscScalar *array; 1876 Vec from,to; 1877 1878 PetscFunctionBegin; 1879 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1880 from = pcbddc->coarse_vec; 1881 to = pcbddc->vec1_P; 1882 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1883 Vec tvec; 1884 1885 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1886 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1887 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1888 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1889 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1890 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1891 } 1892 } else { /* from local to global -> put data in coarse right hand side */ 1893 from = pcbddc->vec1_P; 1894 to = pcbddc->coarse_vec; 1895 } 1896 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1902 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1903 { 1904 PetscErrorCode ierr; 1905 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1906 PetscScalar *array; 1907 Vec from,to; 1908 1909 PetscFunctionBegin; 1910 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1911 from = pcbddc->coarse_vec; 1912 to = pcbddc->vec1_P; 1913 } else { /* from local to global -> put data in coarse right hand side */ 1914 from = pcbddc->vec1_P; 1915 to = pcbddc->coarse_vec; 1916 } 1917 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1918 if (smode == SCATTER_FORWARD) { 1919 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1920 Vec tvec; 1921 1922 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1923 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1924 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1925 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1926 } 1927 } else { 1928 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1929 ierr = VecResetArray(from);CHKERRQ(ierr); 1930 } 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 /* uncomment for testing purposes */ 1936 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1939 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1940 { 1941 PetscErrorCode ierr; 1942 PC_IS* pcis = (PC_IS*)(pc->data); 1943 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1944 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1945 /* one and zero */ 1946 PetscScalar one=1.0,zero=0.0; 1947 /* space to store constraints and their local indices */ 1948 PetscScalar *constraints_data; 1949 PetscInt *constraints_idxs,*constraints_idxs_B; 1950 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1951 PetscInt *constraints_n; 1952 /* iterators */ 1953 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1954 /* BLAS integers */ 1955 PetscBLASInt lwork,lierr; 1956 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1957 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1958 /* reuse */ 1959 PetscInt olocal_primal_size,olocal_primal_size_cc; 1960 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1961 /* change of basis */ 1962 PetscBool qr_needed; 1963 PetscBT change_basis,qr_needed_idx; 1964 /* auxiliary stuff */ 1965 PetscInt *nnz,*is_indices; 1966 PetscInt ncc; 1967 /* some quantities */ 1968 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1969 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1970 1971 PetscFunctionBegin; 1972 /* Destroy Mat objects computed previously */ 1973 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1974 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1975 /* save info on constraints from previous setup (if any) */ 1976 olocal_primal_size = pcbddc->local_primal_size; 1977 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1978 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1979 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1980 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1981 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1982 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1983 1984 /* print some info */ 1985 if (pcbddc->dbg_flag) { 1986 IS vertices; 1987 PetscInt nv,nedges,nfaces; 1988 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1989 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1990 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1997 } 1998 1999 if (!pcbddc->adaptive_selection) { 2000 IS ISForVertices,*ISForFaces,*ISForEdges; 2001 MatNullSpace nearnullsp; 2002 const Vec *nearnullvecs; 2003 Vec *localnearnullsp; 2004 PetscScalar *array; 2005 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2006 PetscBool nnsp_has_cnst; 2007 /* LAPACK working arrays for SVD or POD */ 2008 PetscBool skip_lapack,boolforchange; 2009 PetscScalar *work; 2010 PetscReal *singular_vals; 2011 #if defined(PETSC_USE_COMPLEX) 2012 PetscReal *rwork; 2013 #endif 2014 #if defined(PETSC_MISSING_LAPACK_GESVD) 2015 PetscScalar *temp_basis,*correlation_mat; 2016 #else 2017 PetscBLASInt dummy_int=1; 2018 PetscScalar dummy_scalar=1.; 2019 #endif 2020 2021 /* Get index sets for faces, edges and vertices from graph */ 2022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2023 /* free unneeded index sets */ 2024 if (!pcbddc->use_vertices) { 2025 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2026 } 2027 if (!pcbddc->use_edges) { 2028 for (i=0;i<n_ISForEdges;i++) { 2029 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2032 n_ISForEdges = 0; 2033 } 2034 if (!pcbddc->use_faces) { 2035 for (i=0;i<n_ISForFaces;i++) { 2036 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2037 } 2038 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2039 n_ISForFaces = 0; 2040 } 2041 2042 #if defined(PETSC_USE_DEBUG) 2043 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2044 Also use_change_of_basis should be consistent among processors */ 2045 if (pcbddc->NullSpace) { 2046 PetscBool tbool[2],gbool[2]; 2047 2048 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2049 pcbddc->use_change_of_basis = PETSC_TRUE; 2050 if (!ISForEdges) { 2051 pcbddc->use_change_on_faces = PETSC_TRUE; 2052 } 2053 } 2054 tbool[0] = pcbddc->use_change_of_basis; 2055 tbool[1] = pcbddc->use_change_on_faces; 2056 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2057 pcbddc->use_change_of_basis = gbool[0]; 2058 pcbddc->use_change_on_faces = gbool[1]; 2059 } 2060 #endif 2061 2062 /* check if near null space is attached to global mat */ 2063 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2064 if (nearnullsp) { 2065 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2066 /* remove any stored info */ 2067 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2068 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2069 /* store information for BDDC solver reuse */ 2070 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2071 pcbddc->onearnullspace = nearnullsp; 2072 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2073 for (i=0;i<nnsp_size;i++) { 2074 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2075 } 2076 } else { /* if near null space is not provided BDDC uses constants by default */ 2077 nnsp_size = 0; 2078 nnsp_has_cnst = PETSC_TRUE; 2079 } 2080 /* get max number of constraints on a single cc */ 2081 max_constraints = nnsp_size; 2082 if (nnsp_has_cnst) max_constraints++; 2083 2084 /* 2085 Evaluate maximum storage size needed by the procedure 2086 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2087 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2088 There can be multiple constraints per connected component 2089 */ 2090 n_vertices = 0; 2091 if (ISForVertices) { 2092 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2093 } 2094 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2095 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2096 2097 total_counts = n_ISForFaces+n_ISForEdges; 2098 total_counts *= max_constraints; 2099 total_counts += n_vertices; 2100 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2101 2102 total_counts = 0; 2103 max_size_of_constraint = 0; 2104 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2105 IS used_is; 2106 if (i<n_ISForEdges) { 2107 used_is = ISForEdges[i]; 2108 } else { 2109 used_is = ISForFaces[i-n_ISForEdges]; 2110 } 2111 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2112 total_counts += j; 2113 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2114 } 2115 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2116 2117 /* get local part of global near null space vectors */ 2118 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2119 for (k=0;k<nnsp_size;k++) { 2120 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2121 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2122 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2123 } 2124 2125 /* whether or not to skip lapack calls */ 2126 skip_lapack = PETSC_TRUE; 2127 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2128 2129 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2130 if (!skip_lapack) { 2131 PetscScalar temp_work; 2132 2133 #if defined(PETSC_MISSING_LAPACK_GESVD) 2134 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2135 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2136 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2138 #if defined(PETSC_USE_COMPLEX) 2139 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2140 #endif 2141 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2142 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2143 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2144 lwork = -1; 2145 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2146 #if !defined(PETSC_USE_COMPLEX) 2147 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2148 #else 2149 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2150 #endif 2151 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2152 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2153 #else /* on missing GESVD */ 2154 /* SVD */ 2155 PetscInt max_n,min_n; 2156 max_n = max_size_of_constraint; 2157 min_n = max_constraints; 2158 if (max_size_of_constraint < max_constraints) { 2159 min_n = max_size_of_constraint; 2160 max_n = max_constraints; 2161 } 2162 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2163 #if defined(PETSC_USE_COMPLEX) 2164 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2165 #endif 2166 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2167 lwork = -1; 2168 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2170 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2171 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2172 #if !defined(PETSC_USE_COMPLEX) 2173 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2174 #else 2175 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2176 #endif 2177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2178 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2179 #endif /* on missing GESVD */ 2180 /* Allocate optimal workspace */ 2181 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2182 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2183 } 2184 /* Now we can loop on constraining sets */ 2185 total_counts = 0; 2186 constraints_idxs_ptr[0] = 0; 2187 constraints_data_ptr[0] = 0; 2188 /* vertices */ 2189 if (n_vertices) { 2190 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2191 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2192 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2193 for (i=0;i<n_vertices;i++) { 2194 constraints_n[total_counts] = 1; 2195 constraints_data[total_counts] = 1.0; 2196 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2197 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2198 total_counts++; 2199 } 2200 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2201 PetscBool used_vertex; 2202 for (i=0;i<n_vertices;i++) { 2203 used_vertex = PETSC_FALSE; 2204 k = 0; 2205 while (!used_vertex && k<nnsp_size) { 2206 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2207 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2208 constraints_n[total_counts] = 1; 2209 constraints_idxs[total_counts] = is_indices[i]; 2210 constraints_data[total_counts] = 1.0; 2211 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2212 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2213 total_counts++; 2214 used_vertex = PETSC_TRUE; 2215 } 2216 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2217 k++; 2218 } 2219 } 2220 } 2221 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2222 n_vertices = total_counts; 2223 } 2224 2225 /* edges and faces */ 2226 total_counts_cc = total_counts; 2227 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2228 IS used_is; 2229 PetscBool idxs_copied = PETSC_FALSE; 2230 2231 if (ncc<n_ISForEdges) { 2232 used_is = ISForEdges[ncc]; 2233 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2234 } else { 2235 used_is = ISForFaces[ncc-n_ISForEdges]; 2236 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2237 } 2238 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2239 2240 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2241 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2242 /* change of basis should not be performed on local periodic nodes */ 2243 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2244 if (nnsp_has_cnst) { 2245 PetscScalar quad_value; 2246 2247 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2248 idxs_copied = PETSC_TRUE; 2249 2250 if (!pcbddc->use_nnsp_true) { 2251 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2252 } else { 2253 quad_value = 1.0; 2254 } 2255 for (j=0;j<size_of_constraint;j++) { 2256 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2257 } 2258 temp_constraints++; 2259 total_counts++; 2260 } 2261 for (k=0;k<nnsp_size;k++) { 2262 PetscReal real_value; 2263 PetscScalar *ptr_to_data; 2264 2265 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2266 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2267 for (j=0;j<size_of_constraint;j++) { 2268 ptr_to_data[j] = array[is_indices[j]]; 2269 } 2270 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2271 /* check if array is null on the connected component */ 2272 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2273 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2274 if (real_value > 0.0) { /* keep indices and values */ 2275 temp_constraints++; 2276 total_counts++; 2277 if (!idxs_copied) { 2278 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2279 idxs_copied = PETSC_TRUE; 2280 } 2281 } 2282 } 2283 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2284 valid_constraints = temp_constraints; 2285 if (!pcbddc->use_nnsp_true && temp_constraints) { 2286 if (temp_constraints == 1) { /* just normalize the constraint */ 2287 PetscScalar norm,*ptr_to_data; 2288 2289 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2290 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2291 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2292 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2293 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2294 } else { /* perform SVD */ 2295 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2296 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2297 2298 #if defined(PETSC_MISSING_LAPACK_GESVD) 2299 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2300 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2301 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2302 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2303 from that computed using LAPACKgesvd 2304 -> This is due to a different computation of eigenvectors in LAPACKheev 2305 -> The quality of the POD-computed basis will be the same */ 2306 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2307 /* Store upper triangular part of correlation matrix */ 2308 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2309 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2310 for (j=0;j<temp_constraints;j++) { 2311 for (k=0;k<j+1;k++) { 2312 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2313 } 2314 } 2315 /* compute eigenvalues and eigenvectors of correlation matrix */ 2316 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2317 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2318 #if !defined(PETSC_USE_COMPLEX) 2319 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2320 #else 2321 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2322 #endif 2323 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2324 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2325 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2326 j = 0; 2327 while (j < temp_constraints && singular_vals[j] < tol) j++; 2328 total_counts = total_counts-j; 2329 valid_constraints = temp_constraints-j; 2330 /* scale and copy POD basis into used quadrature memory */ 2331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2335 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2336 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2337 if (j<temp_constraints) { 2338 PetscInt ii; 2339 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2340 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2341 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2342 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2343 for (k=0;k<temp_constraints-j;k++) { 2344 for (ii=0;ii<size_of_constraint;ii++) { 2345 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2346 } 2347 } 2348 } 2349 #else /* on missing GESVD */ 2350 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2351 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2352 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2353 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2354 #if !defined(PETSC_USE_COMPLEX) 2355 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2356 #else 2357 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2358 #endif 2359 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2360 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2361 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2362 k = temp_constraints; 2363 if (k > size_of_constraint) k = size_of_constraint; 2364 j = 0; 2365 while (j < k && singular_vals[k-j-1] < tol) j++; 2366 valid_constraints = k-j; 2367 total_counts = total_counts-temp_constraints+valid_constraints; 2368 #endif /* on missing GESVD */ 2369 } 2370 } 2371 /* update pointers information */ 2372 if (valid_constraints) { 2373 constraints_n[total_counts_cc] = valid_constraints; 2374 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2375 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2376 /* set change_of_basis flag */ 2377 if (boolforchange) { 2378 PetscBTSet(change_basis,total_counts_cc); 2379 } 2380 total_counts_cc++; 2381 } 2382 } 2383 /* free workspace */ 2384 if (!skip_lapack) { 2385 ierr = PetscFree(work);CHKERRQ(ierr); 2386 #if defined(PETSC_USE_COMPLEX) 2387 ierr = PetscFree(rwork);CHKERRQ(ierr); 2388 #endif 2389 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2390 #if defined(PETSC_MISSING_LAPACK_GESVD) 2391 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2392 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2393 #endif 2394 } 2395 for (k=0;k<nnsp_size;k++) { 2396 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2397 } 2398 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2399 /* free index sets of faces, edges and vertices */ 2400 for (i=0;i<n_ISForFaces;i++) { 2401 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2402 } 2403 if (n_ISForFaces) { 2404 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2405 } 2406 for (i=0;i<n_ISForEdges;i++) { 2407 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2408 } 2409 if (n_ISForEdges) { 2410 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2411 } 2412 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2413 } else { 2414 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2415 2416 total_counts = 0; 2417 n_vertices = 0; 2418 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2419 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2420 } 2421 max_constraints = 0; 2422 total_counts_cc = 0; 2423 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2424 total_counts += pcbddc->adaptive_constraints_n[i]; 2425 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2426 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2427 } 2428 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2429 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2430 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2431 constraints_data = pcbddc->adaptive_constraints_data; 2432 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2433 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2434 total_counts_cc = 0; 2435 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2436 if (pcbddc->adaptive_constraints_n[i]) { 2437 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2438 } 2439 } 2440 #if 0 2441 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2442 for (i=0;i<total_counts_cc;i++) { 2443 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2444 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2445 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2446 printf(" %d",constraints_idxs[j]); 2447 } 2448 printf("\n"); 2449 printf("number of cc: %d\n",constraints_n[i]); 2450 } 2451 for (i=0;i<n_vertices;i++) { 2452 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2453 } 2454 for (i=0;i<sub_schurs->n_subs;i++) { 2455 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2456 } 2457 #endif 2458 2459 max_size_of_constraint = 0; 2460 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2461 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2462 /* Change of basis */ 2463 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2464 if (pcbddc->use_change_of_basis) { 2465 for (i=0;i<sub_schurs->n_subs;i++) { 2466 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2467 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2468 } 2469 } 2470 } 2471 } 2472 pcbddc->local_primal_size = total_counts; 2473 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2474 2475 /* map constraints_idxs in boundary numbering */ 2476 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2477 if (i != constraints_idxs_ptr[total_counts_cc]) { 2478 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2479 } 2480 2481 /* Create constraint matrix */ 2482 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2483 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2484 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2485 2486 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2487 /* determine if a QR strategy is needed for change of basis */ 2488 qr_needed = PETSC_FALSE; 2489 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2490 total_primal_vertices=0; 2491 pcbddc->local_primal_size_cc = 0; 2492 for (i=0;i<total_counts_cc;i++) { 2493 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2494 if (size_of_constraint == 1) { 2495 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2496 pcbddc->local_primal_size_cc += 1; 2497 } else if (PetscBTLookup(change_basis,i)) { 2498 for (k=0;k<constraints_n[i];k++) { 2499 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2500 } 2501 pcbddc->local_primal_size_cc += constraints_n[i]; 2502 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2503 PetscBTSet(qr_needed_idx,i); 2504 qr_needed = PETSC_TRUE; 2505 } 2506 } else { 2507 pcbddc->local_primal_size_cc += 1; 2508 } 2509 } 2510 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2511 pcbddc->n_vertices = total_primal_vertices; 2512 /* permute indices in order to have a sorted set of vertices */ 2513 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2514 2515 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2516 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2517 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2518 2519 /* nonzero structure of constraint matrix */ 2520 /* and get reference dof for local constraints */ 2521 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2522 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2523 2524 j = total_primal_vertices; 2525 total_counts = total_primal_vertices; 2526 cum = total_primal_vertices; 2527 for (i=n_vertices;i<total_counts_cc;i++) { 2528 if (!PetscBTLookup(change_basis,i)) { 2529 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2530 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2531 cum++; 2532 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2533 for (k=0;k<constraints_n[i];k++) { 2534 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2535 nnz[j+k] = size_of_constraint; 2536 } 2537 j += constraints_n[i]; 2538 } 2539 } 2540 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2541 ierr = PetscFree(nnz);CHKERRQ(ierr); 2542 2543 /* set values in constraint matrix */ 2544 for (i=0;i<total_primal_vertices;i++) { 2545 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2546 } 2547 total_counts = total_primal_vertices; 2548 for (i=n_vertices;i<total_counts_cc;i++) { 2549 if (!PetscBTLookup(change_basis,i)) { 2550 PetscInt *cols; 2551 2552 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2553 cols = constraints_idxs+constraints_idxs_ptr[i]; 2554 for (k=0;k<constraints_n[i];k++) { 2555 PetscInt row = total_counts+k; 2556 PetscScalar *vals; 2557 2558 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2559 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2560 } 2561 total_counts += constraints_n[i]; 2562 } 2563 } 2564 /* assembling */ 2565 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2566 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2567 2568 /* 2569 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2570 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2571 */ 2572 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2573 if (pcbddc->use_change_of_basis) { 2574 /* dual and primal dofs on a single cc */ 2575 PetscInt dual_dofs,primal_dofs; 2576 /* working stuff for GEQRF */ 2577 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2578 PetscBLASInt lqr_work; 2579 /* working stuff for UNGQR */ 2580 PetscScalar *gqr_work,lgqr_work_t; 2581 PetscBLASInt lgqr_work; 2582 /* working stuff for TRTRS */ 2583 PetscScalar *trs_rhs; 2584 PetscBLASInt Blas_NRHS; 2585 /* pointers for values insertion into change of basis matrix */ 2586 PetscInt *start_rows,*start_cols; 2587 PetscScalar *start_vals; 2588 /* working stuff for values insertion */ 2589 PetscBT is_primal; 2590 PetscInt *aux_primal_numbering_B; 2591 /* matrix sizes */ 2592 PetscInt global_size,local_size; 2593 /* temporary change of basis */ 2594 Mat localChangeOfBasisMatrix; 2595 /* extra space for debugging */ 2596 PetscScalar *dbg_work; 2597 2598 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2599 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2600 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2601 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2602 /* nonzeros for local mat */ 2603 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2604 for (i=0;i<pcis->n;i++) nnz[i]=1; 2605 for (i=n_vertices;i<total_counts_cc;i++) { 2606 if (PetscBTLookup(change_basis,i)) { 2607 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2608 if (PetscBTLookup(qr_needed_idx,i)) { 2609 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2610 } else { 2611 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2612 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2613 } 2614 } 2615 } 2616 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2617 ierr = PetscFree(nnz);CHKERRQ(ierr); 2618 /* Set initial identity in the matrix */ 2619 for (i=0;i<pcis->n;i++) { 2620 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2621 } 2622 2623 if (pcbddc->dbg_flag) { 2624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2626 } 2627 2628 2629 /* Now we loop on the constraints which need a change of basis */ 2630 /* 2631 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2632 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2633 2634 Basic blocks of change of basis matrix T computed by 2635 2636 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2637 2638 | 1 0 ... 0 s_1/S | 2639 | 0 1 ... 0 s_2/S | 2640 | ... | 2641 | 0 ... 1 s_{n-1}/S | 2642 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2643 2644 with S = \sum_{i=1}^n s_i^2 2645 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2646 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2647 2648 - QR decomposition of constraints otherwise 2649 */ 2650 if (qr_needed) { 2651 /* space to store Q */ 2652 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2653 /* first we issue queries for optimal work */ 2654 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2655 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2656 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2657 lqr_work = -1; 2658 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2659 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2660 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2661 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2662 lgqr_work = -1; 2663 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2664 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2665 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2666 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2667 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2668 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2669 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2670 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2671 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2672 /* array to store scaling factors for reflectors */ 2673 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2674 /* array to store rhs and solution of triangular solver */ 2675 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2676 /* allocating workspace for check */ 2677 if (pcbddc->dbg_flag) { 2678 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2679 } 2680 } 2681 /* array to store whether a node is primal or not */ 2682 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2683 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2684 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2685 if (i != total_primal_vertices) { 2686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2687 } 2688 for (i=0;i<total_primal_vertices;i++) { 2689 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2690 } 2691 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2692 2693 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2694 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2695 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2696 if (PetscBTLookup(change_basis,total_counts)) { 2697 /* get constraint info */ 2698 primal_dofs = constraints_n[total_counts]; 2699 dual_dofs = size_of_constraint-primal_dofs; 2700 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2703 } 2704 2705 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2706 2707 /* copy quadrature constraints for change of basis check */ 2708 if (pcbddc->dbg_flag) { 2709 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2710 } 2711 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2712 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2713 2714 /* compute QR decomposition of constraints */ 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2717 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2718 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2719 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2720 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2721 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2722 2723 /* explictly compute R^-T */ 2724 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2725 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2726 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2730 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2731 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2732 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2733 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2734 2735 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2738 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2739 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2740 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2741 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2742 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2743 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2744 2745 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2746 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2747 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2748 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2751 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2752 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2753 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2754 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2755 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2756 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2757 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2758 2759 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2760 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2761 /* insert cols for primal dofs */ 2762 for (j=0;j<primal_dofs;j++) { 2763 start_vals = &qr_basis[j*size_of_constraint]; 2764 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2765 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2766 } 2767 /* insert cols for dual dofs */ 2768 for (j=0,k=0;j<dual_dofs;k++) { 2769 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2770 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2771 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2772 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2773 j++; 2774 } 2775 } 2776 2777 /* check change of basis */ 2778 if (pcbddc->dbg_flag) { 2779 PetscInt ii,jj; 2780 PetscBool valid_qr=PETSC_TRUE; 2781 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2784 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2786 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2787 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2788 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2790 for (jj=0;jj<size_of_constraint;jj++) { 2791 for (ii=0;ii<primal_dofs;ii++) { 2792 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2793 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2794 } 2795 } 2796 if (!valid_qr) { 2797 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2798 for (jj=0;jj<size_of_constraint;jj++) { 2799 for (ii=0;ii<primal_dofs;ii++) { 2800 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2801 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2802 } 2803 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2804 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2805 } 2806 } 2807 } 2808 } else { 2809 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2810 } 2811 } 2812 } else { /* simple transformation block */ 2813 PetscInt row,col; 2814 PetscScalar val,norm; 2815 2816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2817 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2818 for (j=0;j<size_of_constraint;j++) { 2819 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2820 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2821 if (!PetscBTLookup(is_primal,row_B)) { 2822 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2823 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2824 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2825 } else { 2826 for (k=0;k<size_of_constraint;k++) { 2827 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2828 if (row != col) { 2829 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2830 } else { 2831 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2832 } 2833 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2834 } 2835 } 2836 } 2837 if (pcbddc->dbg_flag) { 2838 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2839 } 2840 } 2841 } else { 2842 if (pcbddc->dbg_flag) { 2843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2844 } 2845 } 2846 } 2847 2848 /* free workspace */ 2849 if (qr_needed) { 2850 if (pcbddc->dbg_flag) { 2851 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2852 } 2853 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2854 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2855 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2856 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2857 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2858 } 2859 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2860 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 2863 /* assembling of global change of variable */ 2864 { 2865 Mat tmat; 2866 PetscInt bs; 2867 2868 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2869 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2870 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2871 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2872 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2873 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2874 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2875 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2876 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2877 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2878 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2879 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2880 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2881 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2882 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2883 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2884 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2885 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2886 } 2887 /* check */ 2888 if (pcbddc->dbg_flag) { 2889 PetscReal error; 2890 Vec x,x_change; 2891 2892 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2893 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2894 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2895 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2896 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2897 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2898 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2899 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2900 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2901 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2902 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2903 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2905 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2906 ierr = VecDestroy(&x);CHKERRQ(ierr); 2907 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2908 } 2909 2910 /* adapt sub_schurs computed (if any) */ 2911 if (pcbddc->use_deluxe_scaling) { 2912 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2913 if (sub_schurs->S_Ej_all) { 2914 Mat S_new,tmat; 2915 IS is_all_N; 2916 2917 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2918 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2919 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2920 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2921 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2922 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2923 sub_schurs->S_Ej_all = S_new; 2924 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2925 if (sub_schurs->sum_S_Ej_all) { 2926 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2927 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2928 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2929 sub_schurs->sum_S_Ej_all = S_new; 2930 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2931 } 2932 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2933 } 2934 } 2935 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2936 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2937 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2938 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2939 } 2940 2941 /* set up change of basis context */ 2942 if (pcbddc->ChangeOfBasisMatrix) { 2943 PCBDDCChange_ctx change_ctx; 2944 2945 if (!pcbddc->new_global_mat) { 2946 PetscInt global_size,local_size; 2947 2948 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2949 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2950 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2951 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2952 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2953 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2954 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2955 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2956 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2957 } else { 2958 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2959 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2960 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2961 } 2962 if (!pcbddc->user_ChangeOfBasisMatrix) { 2963 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2964 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2965 } else { 2966 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2967 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2968 } 2969 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2970 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2971 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2973 } 2974 2975 /* check if a new primal space has been introduced */ 2976 pcbddc->new_primal_space_local = PETSC_TRUE; 2977 if (olocal_primal_size == pcbddc->local_primal_size) { 2978 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2979 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2980 if (!pcbddc->new_primal_space_local) { 2981 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2982 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2983 } 2984 } 2985 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2986 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2987 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2988 2989 /* flush dbg viewer */ 2990 if (pcbddc->dbg_flag) { 2991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2992 } 2993 2994 /* free workspace */ 2995 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2996 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2997 if (!pcbddc->adaptive_selection) { 2998 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2999 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3000 } else { 3001 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3002 pcbddc->adaptive_constraints_idxs_ptr, 3003 pcbddc->adaptive_constraints_data_ptr, 3004 pcbddc->adaptive_constraints_idxs, 3005 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3006 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3007 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3008 } 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3014 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3015 { 3016 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3017 PC_IS *pcis = (PC_IS*)pc->data; 3018 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3019 PetscInt ierr,i,vertex_size,N; 3020 PetscViewer viewer=pcbddc->dbg_viewer; 3021 3022 PetscFunctionBegin; 3023 /* Reset previously computed graph */ 3024 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3025 /* Init local Graph struct */ 3026 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3027 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3028 3029 /* Check validity of the csr graph passed in by the user */ 3030 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3031 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3032 } 3033 3034 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3035 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3036 PetscInt *xadj,*adjncy; 3037 PetscInt nvtxs; 3038 PetscBool flg_row=PETSC_FALSE; 3039 3040 if (pcbddc->use_local_adj) { 3041 3042 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3043 if (flg_row) { 3044 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3045 pcbddc->computed_rowadj = PETSC_TRUE; 3046 } 3047 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3048 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3049 IS is_dummy; 3050 ISLocalToGlobalMapping l2gmap_dummy; 3051 PetscInt j,sum; 3052 PetscInt *cxadj,*cadjncy; 3053 const PetscInt *idxs; 3054 PCBDDCGraph graph; 3055 PetscBT is_on_boundary; 3056 3057 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3058 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3059 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3060 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3061 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3062 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3063 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3064 if (flg_row) { 3065 graph->xadj = xadj; 3066 graph->adjncy = adjncy; 3067 } 3068 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3069 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3070 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3071 3072 if (pcbddc->dbg_flag) { 3073 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3074 for (i=0;i<graph->ncc;i++) { 3075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3076 } 3077 } 3078 3079 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3080 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3081 for (i=0;i<pcis->n_B;i++) { 3082 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3083 } 3084 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3085 3086 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3087 sum = 0; 3088 for (i=0;i<graph->ncc;i++) { 3089 PetscInt sizecc = 0; 3090 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3091 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3092 sizecc++; 3093 } 3094 } 3095 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3096 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3097 cxadj[graph->queue[j]] = sizecc; 3098 } 3099 } 3100 sum += sizecc*sizecc; 3101 } 3102 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3103 sum = 0; 3104 for (i=0;i<pcis->n;i++) { 3105 PetscInt temp = cxadj[i]; 3106 cxadj[i] = sum; 3107 sum += temp; 3108 } 3109 cxadj[pcis->n] = sum; 3110 for (i=0;i<graph->ncc;i++) { 3111 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3112 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3113 PetscInt k,sizecc = 0; 3114 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3115 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3116 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3117 sizecc++; 3118 } 3119 } 3120 } 3121 } 3122 } 3123 if (sum) { 3124 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3125 } else { 3126 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3127 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3128 } 3129 graph->xadj = 0; 3130 graph->adjncy = 0; 3131 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3132 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3133 } 3134 } 3135 if (pcbddc->dbg_flag) { 3136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3137 } 3138 3139 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3140 vertex_size = 1; 3141 if (pcbddc->user_provided_isfordofs) { 3142 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3143 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3144 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3145 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3146 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3147 } 3148 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3149 pcbddc->n_ISForDofs = 0; 3150 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3151 } 3152 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3153 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3154 } else { 3155 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3156 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3157 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3158 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3159 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3160 } 3161 } 3162 } 3163 3164 /* Setup of Graph */ 3165 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3166 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3167 } 3168 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3169 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3170 } 3171 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3172 3173 /* Graph's connected components analysis */ 3174 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3175 3176 /* print some info to stdout */ 3177 if (pcbddc->dbg_flag) { 3178 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3179 } 3180 3181 /* mark topography has done */ 3182 pcbddc->recompute_topography = PETSC_FALSE; 3183 PetscFunctionReturn(0); 3184 } 3185 3186 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "PCBDDCSubsetNumbering" 3189 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3190 { 3191 PetscSF sf; 3192 PetscLayout map; 3193 const PetscInt *idxs; 3194 PetscInt *leaf_data,*root_data,*gidxs; 3195 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3196 PetscInt n_n,nlocals,start,first_index; 3197 PetscMPIInt commsize; 3198 PetscBool first_found; 3199 PetscErrorCode ierr; 3200 3201 PetscFunctionBegin; 3202 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3203 if (subset_mult) { 3204 PetscCheckSameComm(subset,1,subset_mult,2); 3205 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3206 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3207 } 3208 /* create workspace layout for computing global indices of subset */ 3209 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3210 lbounds[0] = lbounds[1] = 0; 3211 for (i=0;i<n;i++) { 3212 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3213 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3214 } 3215 lbounds[0] = -lbounds[0]; 3216 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3217 gbounds[0] = -gbounds[0]; 3218 N = gbounds[1] - gbounds[0] + 1; 3219 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3220 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3222 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3223 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3224 3225 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3226 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3227 if (subset_mult) { 3228 const PetscInt* idxs_mult; 3229 3230 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3231 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3232 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3233 } else { 3234 for (i=0;i<n;i++) leaf_data[i] = 1; 3235 } 3236 /* local size of new subset */ 3237 n_n = 0; 3238 for (i=0;i<n;i++) n_n += leaf_data[i]; 3239 3240 /* global indexes in layout */ 3241 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3242 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3243 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3244 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3245 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3246 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3247 3248 /* reduce from leaves to roots */ 3249 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3250 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3251 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3252 3253 /* count indexes in local part of layout */ 3254 nlocals = 0; 3255 first_index = -1; 3256 first_found = PETSC_FALSE; 3257 for (i=0;i<Nl;i++) { 3258 if (!first_found && root_data[i]) { 3259 first_found = PETSC_TRUE; 3260 first_index = i; 3261 } 3262 nlocals += root_data[i]; 3263 } 3264 3265 /* cumulative of number of indexes and size of subset without holes */ 3266 #if defined(PETSC_HAVE_MPI_EXSCAN) 3267 start = 0; 3268 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3269 #else 3270 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3271 start = start-nlocals; 3272 #endif 3273 3274 if (N_n) { /* compute total size of new subset if requested */ 3275 *N_n = start + nlocals; 3276 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3277 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3278 } 3279 3280 /* adapt root data with cumulative */ 3281 if (first_found) { 3282 PetscInt old_index; 3283 3284 root_data[first_index] += start; 3285 old_index = first_index; 3286 for (i=first_index+1;i<Nl;i++) { 3287 if (root_data[i]) { 3288 root_data[i] += root_data[old_index]; 3289 old_index = i; 3290 } 3291 } 3292 } 3293 3294 /* from roots to leaves */ 3295 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3296 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3297 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3298 3299 /* create new IS with global indexes without holes */ 3300 if (subset_mult) { 3301 const PetscInt* idxs_mult; 3302 PetscInt cum; 3303 3304 cum = 0; 3305 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3306 for (i=0;i<n;i++) { 3307 PetscInt j; 3308 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3309 } 3310 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3311 } else { 3312 for (i=0;i<n;i++) { 3313 gidxs[i] = leaf_data[i]-1; 3314 } 3315 } 3316 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3317 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3323 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3324 { 3325 PetscInt i,j; 3326 PetscScalar *alphas; 3327 PetscErrorCode ierr; 3328 3329 PetscFunctionBegin; 3330 /* this implements stabilized Gram-Schmidt */ 3331 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3332 for (i=0;i<n;i++) { 3333 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3334 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3335 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3336 } 3337 ierr = PetscFree(alphas);CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatISGetSubassemblingPattern" 3343 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3344 { 3345 IS ranks_send_to; 3346 PetscInt n_neighs,*neighs,*n_shared,**shared; 3347 PetscMPIInt size,rank,color; 3348 PetscInt *xadj,*adjncy; 3349 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3350 PetscInt i,local_size,threshold=0; 3351 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3352 PetscSubcomm subcomm; 3353 PetscErrorCode ierr; 3354 3355 PetscFunctionBegin; 3356 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3357 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3358 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3359 3360 /* Get info on mapping */ 3361 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3362 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3363 3364 /* build local CSR graph of subdomains' connectivity */ 3365 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3366 xadj[0] = 0; 3367 xadj[1] = PetscMax(n_neighs-1,0); 3368 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3369 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3370 3371 if (threshold) { 3372 PetscInt xadj_count = 0; 3373 for (i=1;i<n_neighs;i++) { 3374 if (n_shared[i] > threshold) { 3375 adjncy[xadj_count] = neighs[i]; 3376 adjncy_wgt[xadj_count] = n_shared[i]; 3377 xadj_count++; 3378 } 3379 } 3380 xadj[1] = xadj_count; 3381 } else { 3382 if (xadj[1]) { 3383 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3384 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3385 } 3386 } 3387 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3388 if (use_square) { 3389 for (i=0;i<xadj[1];i++) { 3390 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3391 } 3392 } 3393 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3394 3395 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3396 3397 /* 3398 Restrict work on active processes only. 3399 */ 3400 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3401 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3402 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3403 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3404 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3405 if (color) { 3406 ierr = PetscFree(xadj);CHKERRQ(ierr); 3407 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3408 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3409 } else { 3410 Mat subdomain_adj; 3411 IS new_ranks,new_ranks_contig; 3412 MatPartitioning partitioner; 3413 PetscInt prank,rstart=0,rend=0; 3414 PetscInt *is_indices,*oldranks; 3415 PetscBool aggregate; 3416 3417 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3418 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3419 prank = rank; 3420 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3421 /* 3422 for (i=0;i<size;i++) { 3423 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3424 } 3425 */ 3426 for (i=0;i<xadj[1];i++) { 3427 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3428 } 3429 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3430 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3431 if (aggregate) { 3432 PetscInt lrows,row,ncols,*cols; 3433 PetscMPIInt nrank; 3434 PetscScalar *vals; 3435 3436 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3437 lrows = 0; 3438 if (nrank<redprocs) { 3439 lrows = size/redprocs; 3440 if (nrank<size%redprocs) lrows++; 3441 } 3442 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3443 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3444 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3445 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3446 row = nrank; 3447 ncols = xadj[1]-xadj[0]; 3448 cols = adjncy; 3449 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3450 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3451 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3452 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3453 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3454 ierr = PetscFree(xadj);CHKERRQ(ierr); 3455 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3456 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3457 ierr = PetscFree(vals);CHKERRQ(ierr); 3458 } else { 3459 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3460 } 3461 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3462 3463 /* Partition */ 3464 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3465 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3466 if (use_vwgt) { 3467 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3468 v_wgt[0] = local_size; 3469 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3470 } 3471 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3472 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3473 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3474 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3475 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3476 3477 /* renumber new_ranks to avoid "holes" in new set of processors */ 3478 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3479 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3480 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3481 if (!redprocs) { 3482 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3483 } else { 3484 PetscInt idxs[1]; 3485 PetscMPIInt tag; 3486 MPI_Request *reqs; 3487 3488 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3489 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3490 for (i=rstart;i<rend;i++) { 3491 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3492 } 3493 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3494 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3495 ierr = PetscFree(reqs);CHKERRQ(ierr); 3496 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3497 } 3498 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3499 /* clean up */ 3500 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3501 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3502 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3503 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3504 } 3505 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3506 3507 /* assemble parallel IS for sends */ 3508 i = 1; 3509 if (color) i=0; 3510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3511 /* get back IS */ 3512 *is_sends = ranks_send_to; 3513 PetscFunctionReturn(0); 3514 } 3515 3516 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3517 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatISSubassemble" 3520 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3521 { 3522 Mat local_mat; 3523 IS is_sends_internal; 3524 PetscInt rows,cols,new_local_rows; 3525 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3526 PetscBool ismatis,isdense,newisdense,destroy_mat; 3527 ISLocalToGlobalMapping l2gmap; 3528 PetscInt* l2gmap_indices; 3529 const PetscInt* is_indices; 3530 MatType new_local_type; 3531 /* buffers */ 3532 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3533 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3534 PetscInt *recv_buffer_idxs_local; 3535 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3536 /* MPI */ 3537 MPI_Comm comm,comm_n; 3538 PetscSubcomm subcomm; 3539 PetscMPIInt n_sends,n_recvs,commsize; 3540 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3541 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3542 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3543 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3544 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3545 PetscErrorCode ierr; 3546 3547 PetscFunctionBegin; 3548 /* TODO: add missing checks */ 3549 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3550 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3551 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3552 PetscValidLogicalCollectiveInt(mat,nis,7); 3553 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3554 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3555 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3556 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3557 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3558 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3559 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3560 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3561 PetscInt mrows,mcols,mnrows,mncols; 3562 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3563 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3564 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3565 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3566 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3567 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3568 } 3569 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3570 PetscValidLogicalCollectiveInt(mat,bs,0); 3571 /* prepare IS for sending if not provided */ 3572 if (!is_sends) { 3573 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3574 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3575 } else { 3576 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3577 is_sends_internal = is_sends; 3578 } 3579 3580 /* get comm */ 3581 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3582 3583 /* compute number of sends */ 3584 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3585 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3586 3587 /* compute number of receives */ 3588 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3589 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3590 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3591 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3592 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3593 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3594 ierr = PetscFree(iflags);CHKERRQ(ierr); 3595 3596 /* restrict comm if requested */ 3597 subcomm = 0; 3598 destroy_mat = PETSC_FALSE; 3599 if (restrict_comm) { 3600 PetscMPIInt color,subcommsize; 3601 3602 color = 0; 3603 if (restrict_full) { 3604 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3605 } else { 3606 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3607 } 3608 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3609 subcommsize = commsize - subcommsize; 3610 /* check if reuse has been requested */ 3611 if (reuse == MAT_REUSE_MATRIX) { 3612 if (*mat_n) { 3613 PetscMPIInt subcommsize2; 3614 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3615 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3616 comm_n = PetscObjectComm((PetscObject)*mat_n); 3617 } else { 3618 comm_n = PETSC_COMM_SELF; 3619 } 3620 } else { /* MAT_INITIAL_MATRIX */ 3621 PetscMPIInt rank; 3622 3623 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3624 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3625 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3626 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3627 comm_n = PetscSubcommChild(subcomm); 3628 } 3629 /* flag to destroy *mat_n if not significative */ 3630 if (color) destroy_mat = PETSC_TRUE; 3631 } else { 3632 comm_n = comm; 3633 } 3634 3635 /* prepare send/receive buffers */ 3636 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3637 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3638 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3639 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3640 if (nis) { 3641 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3642 } 3643 3644 /* Get data from local matrices */ 3645 if (!isdense) { 3646 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3647 /* TODO: See below some guidelines on how to prepare the local buffers */ 3648 /* 3649 send_buffer_vals should contain the raw values of the local matrix 3650 send_buffer_idxs should contain: 3651 - MatType_PRIVATE type 3652 - PetscInt size_of_l2gmap 3653 - PetscInt global_row_indices[size_of_l2gmap] 3654 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3655 */ 3656 } else { 3657 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3658 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3659 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3660 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3661 send_buffer_idxs[1] = i; 3662 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3663 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3664 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3665 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3666 for (i=0;i<n_sends;i++) { 3667 ilengths_vals[is_indices[i]] = len*len; 3668 ilengths_idxs[is_indices[i]] = len+2; 3669 } 3670 } 3671 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3672 /* additional is (if any) */ 3673 if (nis) { 3674 PetscMPIInt psum; 3675 PetscInt j; 3676 for (j=0,psum=0;j<nis;j++) { 3677 PetscInt plen; 3678 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3679 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3680 psum += len+1; /* indices + lenght */ 3681 } 3682 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3683 for (j=0,psum=0;j<nis;j++) { 3684 PetscInt plen; 3685 const PetscInt *is_array_idxs; 3686 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3687 send_buffer_idxs_is[psum] = plen; 3688 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3689 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3690 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3691 psum += plen+1; /* indices + lenght */ 3692 } 3693 for (i=0;i<n_sends;i++) { 3694 ilengths_idxs_is[is_indices[i]] = psum; 3695 } 3696 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3697 } 3698 3699 buf_size_idxs = 0; 3700 buf_size_vals = 0; 3701 buf_size_idxs_is = 0; 3702 for (i=0;i<n_recvs;i++) { 3703 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3704 buf_size_vals += (PetscInt)olengths_vals[i]; 3705 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3706 } 3707 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3708 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3709 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3710 3711 /* get new tags for clean communications */ 3712 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3713 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3714 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3715 3716 /* allocate for requests */ 3717 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3718 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3719 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3720 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3721 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3722 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3723 3724 /* communications */ 3725 ptr_idxs = recv_buffer_idxs; 3726 ptr_vals = recv_buffer_vals; 3727 ptr_idxs_is = recv_buffer_idxs_is; 3728 for (i=0;i<n_recvs;i++) { 3729 source_dest = onodes[i]; 3730 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3731 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3732 ptr_idxs += olengths_idxs[i]; 3733 ptr_vals += olengths_vals[i]; 3734 if (nis) { 3735 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3736 ptr_idxs_is += olengths_idxs_is[i]; 3737 } 3738 } 3739 for (i=0;i<n_sends;i++) { 3740 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3741 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3742 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3743 if (nis) { 3744 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3745 } 3746 } 3747 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3748 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3749 3750 /* assemble new l2g map */ 3751 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3752 ptr_idxs = recv_buffer_idxs; 3753 new_local_rows = 0; 3754 for (i=0;i<n_recvs;i++) { 3755 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3756 ptr_idxs += olengths_idxs[i]; 3757 } 3758 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3759 ptr_idxs = recv_buffer_idxs; 3760 new_local_rows = 0; 3761 for (i=0;i<n_recvs;i++) { 3762 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3763 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3764 ptr_idxs += olengths_idxs[i]; 3765 } 3766 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3767 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3768 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3769 3770 /* infer new local matrix type from received local matrices type */ 3771 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3772 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3773 if (n_recvs) { 3774 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3775 ptr_idxs = recv_buffer_idxs; 3776 for (i=0;i<n_recvs;i++) { 3777 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3778 new_local_type_private = MATAIJ_PRIVATE; 3779 break; 3780 } 3781 ptr_idxs += olengths_idxs[i]; 3782 } 3783 switch (new_local_type_private) { 3784 case MATDENSE_PRIVATE: 3785 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3786 new_local_type = MATSEQAIJ; 3787 bs = 1; 3788 } else { /* if I receive only 1 dense matrix */ 3789 new_local_type = MATSEQDENSE; 3790 bs = 1; 3791 } 3792 break; 3793 case MATAIJ_PRIVATE: 3794 new_local_type = MATSEQAIJ; 3795 bs = 1; 3796 break; 3797 case MATBAIJ_PRIVATE: 3798 new_local_type = MATSEQBAIJ; 3799 break; 3800 case MATSBAIJ_PRIVATE: 3801 new_local_type = MATSEQSBAIJ; 3802 break; 3803 default: 3804 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3805 break; 3806 } 3807 } else { /* by default, new_local_type is seqdense */ 3808 new_local_type = MATSEQDENSE; 3809 bs = 1; 3810 } 3811 3812 /* create MATIS object if needed */ 3813 if (reuse == MAT_INITIAL_MATRIX) { 3814 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3815 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3816 } else { 3817 /* it also destroys the local matrices */ 3818 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3819 } 3820 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3821 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3822 3823 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3824 3825 /* Global to local map of received indices */ 3826 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3827 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3828 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3829 3830 /* restore attributes -> type of incoming data and its size */ 3831 buf_size_idxs = 0; 3832 for (i=0;i<n_recvs;i++) { 3833 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3834 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3835 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3836 } 3837 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3838 3839 /* set preallocation */ 3840 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3841 if (!newisdense) { 3842 PetscInt *new_local_nnz=0; 3843 3844 ptr_vals = recv_buffer_vals; 3845 ptr_idxs = recv_buffer_idxs_local; 3846 if (n_recvs) { 3847 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3848 } 3849 for (i=0;i<n_recvs;i++) { 3850 PetscInt j; 3851 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3852 for (j=0;j<*(ptr_idxs+1);j++) { 3853 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3854 } 3855 } else { 3856 /* TODO */ 3857 } 3858 ptr_idxs += olengths_idxs[i]; 3859 } 3860 if (new_local_nnz) { 3861 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3862 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3863 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3864 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3865 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3866 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3867 } else { 3868 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3869 } 3870 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3871 } else { 3872 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3873 } 3874 3875 /* set values */ 3876 ptr_vals = recv_buffer_vals; 3877 ptr_idxs = recv_buffer_idxs_local; 3878 for (i=0;i<n_recvs;i++) { 3879 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3880 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3881 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3882 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3883 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3885 } else { 3886 /* TODO */ 3887 } 3888 ptr_idxs += olengths_idxs[i]; 3889 ptr_vals += olengths_vals[i]; 3890 } 3891 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3895 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3896 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3897 3898 #if 0 3899 if (!restrict_comm) { /* check */ 3900 Vec lvec,rvec; 3901 PetscReal infty_error; 3902 3903 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3904 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3905 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3906 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3907 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3908 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3909 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3910 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3911 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3912 } 3913 #endif 3914 3915 /* assemble new additional is (if any) */ 3916 if (nis) { 3917 PetscInt **temp_idxs,*count_is,j,psum; 3918 3919 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3920 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3921 ptr_idxs = recv_buffer_idxs_is; 3922 psum = 0; 3923 for (i=0;i<n_recvs;i++) { 3924 for (j=0;j<nis;j++) { 3925 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3926 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3927 psum += plen; 3928 ptr_idxs += plen+1; /* shift pointer to received data */ 3929 } 3930 } 3931 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3932 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3933 for (i=1;i<nis;i++) { 3934 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3935 } 3936 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3937 ptr_idxs = recv_buffer_idxs_is; 3938 for (i=0;i<n_recvs;i++) { 3939 for (j=0;j<nis;j++) { 3940 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3941 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3942 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3943 ptr_idxs += plen+1; /* shift pointer to received data */ 3944 } 3945 } 3946 for (i=0;i<nis;i++) { 3947 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3948 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3949 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3950 } 3951 ierr = PetscFree(count_is);CHKERRQ(ierr); 3952 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3953 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3954 } 3955 /* free workspace */ 3956 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3957 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3958 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3959 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3960 if (isdense) { 3961 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3962 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3963 } else { 3964 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3965 } 3966 if (nis) { 3967 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3968 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3969 } 3970 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3971 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3972 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3973 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3974 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3975 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3976 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3977 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3978 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3979 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3980 ierr = PetscFree(onodes);CHKERRQ(ierr); 3981 if (nis) { 3982 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3983 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3984 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3985 } 3986 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3987 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3988 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3989 for (i=0;i<nis;i++) { 3990 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3991 } 3992 *mat_n = NULL; 3993 } 3994 PetscFunctionReturn(0); 3995 } 3996 3997 /* temporary hack into ksp private data structure */ 3998 #include <petsc/private/kspimpl.h> 3999 4000 #undef __FUNCT__ 4001 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4002 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4003 { 4004 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4005 PC_IS *pcis = (PC_IS*)pc->data; 4006 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4007 MatNullSpace CoarseNullSpace=NULL; 4008 ISLocalToGlobalMapping coarse_islg; 4009 IS coarse_is,*isarray; 4010 PetscInt i,im_active=-1,active_procs=-1; 4011 PetscInt nis,nisdofs,nisneu; 4012 PC pc_temp; 4013 PCType coarse_pc_type; 4014 KSPType coarse_ksp_type; 4015 PetscBool multilevel_requested,multilevel_allowed; 4016 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4017 Mat t_coarse_mat_is; 4018 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4019 PetscMPIInt all_procs; 4020 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4021 PetscBool compute_vecs = PETSC_FALSE; 4022 PetscScalar *array; 4023 PetscErrorCode ierr; 4024 4025 PetscFunctionBegin; 4026 /* Assign global numbering to coarse dofs */ 4027 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4028 PetscInt ocoarse_size; 4029 compute_vecs = PETSC_TRUE; 4030 ocoarse_size = pcbddc->coarse_size; 4031 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4032 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4033 /* see if we can avoid some work */ 4034 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4035 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4036 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4037 PC pc; 4038 PetscBool isbddc; 4039 4040 /* temporary workaround since PCBDDC does not have a reset method so far */ 4041 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4042 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4043 if (isbddc) { 4044 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4045 } 4046 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4047 coarse_reuse = PETSC_FALSE; 4048 } else { /* we can safely reuse already computed coarse matrix */ 4049 coarse_reuse = PETSC_TRUE; 4050 } 4051 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4052 coarse_reuse = PETSC_FALSE; 4053 } 4054 /* reset any subassembling information */ 4055 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4056 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4057 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4058 coarse_reuse = PETSC_TRUE; 4059 } 4060 4061 /* count "active" (i.e. with positive local size) and "void" processes */ 4062 im_active = !!(pcis->n); 4063 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4064 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4065 void_procs = all_procs-active_procs; 4066 csin_type_simple = PETSC_TRUE; 4067 redist = PETSC_FALSE; 4068 if (pcbddc->current_level && void_procs) { 4069 csin_ml = PETSC_TRUE; 4070 ncoarse_ml = void_procs; 4071 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4072 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4073 csin_ds = PETSC_TRUE; 4074 ncoarse_ds = pcbddc->redistribute_coarse; 4075 redist = PETSC_TRUE; 4076 } else { 4077 csin_ds = PETSC_TRUE; 4078 ncoarse_ds = active_procs; 4079 redist = PETSC_TRUE; 4080 } 4081 } else { 4082 csin_ml = PETSC_FALSE; 4083 ncoarse_ml = all_procs; 4084 if (void_procs) { 4085 csin_ds = PETSC_TRUE; 4086 ncoarse_ds = void_procs; 4087 csin_type_simple = PETSC_FALSE; 4088 } else { 4089 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4090 csin_ds = PETSC_TRUE; 4091 ncoarse_ds = pcbddc->redistribute_coarse; 4092 redist = PETSC_TRUE; 4093 } else { 4094 csin_ds = PETSC_FALSE; 4095 ncoarse_ds = all_procs; 4096 } 4097 } 4098 } 4099 4100 /* 4101 test if we can go multilevel: three conditions must be satisfied: 4102 - we have not exceeded the number of levels requested 4103 - we can actually subassemble the active processes 4104 - we can find a suitable number of MPI processes where we can place the subassembled problem 4105 */ 4106 multilevel_allowed = PETSC_FALSE; 4107 multilevel_requested = PETSC_FALSE; 4108 if (pcbddc->current_level < pcbddc->max_levels) { 4109 multilevel_requested = PETSC_TRUE; 4110 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4111 multilevel_allowed = PETSC_FALSE; 4112 } else { 4113 multilevel_allowed = PETSC_TRUE; 4114 } 4115 } 4116 /* determine number of process partecipating to coarse solver */ 4117 if (multilevel_allowed) { 4118 ncoarse = ncoarse_ml; 4119 csin = csin_ml; 4120 redist = PETSC_FALSE; 4121 } else { 4122 ncoarse = ncoarse_ds; 4123 csin = csin_ds; 4124 } 4125 4126 /* creates temporary l2gmap and IS for coarse indexes */ 4127 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4128 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4129 4130 /* creates temporary MATIS object for coarse matrix */ 4131 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4132 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4133 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4134 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4135 #if 0 4136 { 4137 PetscViewer viewer; 4138 char filename[256]; 4139 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4140 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4141 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4142 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4143 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4144 } 4145 #endif 4146 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4147 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4148 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4151 4152 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4153 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4154 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4155 const PetscInt *idxs; 4156 ISLocalToGlobalMapping tmap; 4157 4158 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4159 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4160 /* allocate space for temporary storage */ 4161 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4162 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4163 /* allocate for IS array */ 4164 nisdofs = pcbddc->n_ISForDofsLocal; 4165 nisneu = !!pcbddc->NeumannBoundariesLocal; 4166 nis = nisdofs + nisneu; 4167 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4168 /* dofs splitting */ 4169 for (i=0;i<nisdofs;i++) { 4170 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4171 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4172 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4173 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4174 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4175 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4176 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4177 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4178 } 4179 /* neumann boundaries */ 4180 if (pcbddc->NeumannBoundariesLocal) { 4181 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4182 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4183 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4184 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4185 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4186 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4187 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4188 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4189 } 4190 /* free memory */ 4191 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4192 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4193 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4194 } else { 4195 nis = 0; 4196 nisdofs = 0; 4197 nisneu = 0; 4198 isarray = NULL; 4199 } 4200 /* destroy no longer needed map */ 4201 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4202 4203 /* restrict on coarse candidates (if needed) */ 4204 coarse_mat_is = NULL; 4205 if (csin) { 4206 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4207 if (redist) { 4208 PetscMPIInt rank; 4209 PetscInt spc,n_spc_p1,dest[1],destsize; 4210 4211 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4212 spc = active_procs/ncoarse; 4213 n_spc_p1 = active_procs%ncoarse; 4214 if (im_active) { 4215 destsize = 1; 4216 if (rank > n_spc_p1*(spc+1)-1) { 4217 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4218 } else { 4219 dest[0] = rank/(spc+1); 4220 } 4221 } else { 4222 destsize = 0; 4223 } 4224 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4225 } else if (csin_type_simple) { 4226 PetscMPIInt rank; 4227 PetscInt issize,isidx; 4228 4229 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4230 if (im_active) { 4231 issize = 1; 4232 isidx = (PetscInt)rank; 4233 } else { 4234 issize = 0; 4235 isidx = -1; 4236 } 4237 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4238 } else { /* get a suitable subassembling pattern from MATIS code */ 4239 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4240 } 4241 4242 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4243 if (!redist || ncoarse <= void_procs) { 4244 PetscInt ncoarse_cand,tissize,*nisindices; 4245 PetscInt *coarse_candidates; 4246 const PetscInt* tisindices; 4247 4248 /* get coarse candidates' ranks in pc communicator */ 4249 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4250 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4251 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4252 if (!coarse_candidates[i]) { 4253 coarse_candidates[ncoarse_cand++]=i; 4254 } 4255 } 4256 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4257 4258 4259 if (pcbddc->dbg_flag) { 4260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4262 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4264 for (i=0;i<ncoarse_cand;i++) { 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4266 } 4267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4268 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4269 } 4270 /* shift the pattern on coarse candidates */ 4271 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4272 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4273 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4274 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4275 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4276 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4277 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4278 } 4279 if (pcbddc->dbg_flag) { 4280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4282 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4284 } 4285 } 4286 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4287 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4288 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4289 } else { /* this is the last level, so use just receiving processes in subcomm */ 4290 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4291 } 4292 } else { 4293 if (pcbddc->dbg_flag) { 4294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4295 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4296 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4297 } 4298 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4299 coarse_mat_is = t_coarse_mat_is; 4300 } 4301 4302 /* create local to global scatters for coarse problem */ 4303 if (compute_vecs) { 4304 PetscInt lrows; 4305 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4306 if (coarse_mat_is) { 4307 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4308 } else { 4309 lrows = 0; 4310 } 4311 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4312 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4313 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4314 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4315 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4316 } 4317 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4318 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4319 4320 /* set defaults for coarse KSP and PC */ 4321 if (multilevel_allowed) { 4322 coarse_ksp_type = KSPRICHARDSON; 4323 coarse_pc_type = PCBDDC; 4324 } else { 4325 coarse_ksp_type = KSPPREONLY; 4326 coarse_pc_type = PCREDUNDANT; 4327 } 4328 4329 /* print some info if requested */ 4330 if (pcbddc->dbg_flag) { 4331 if (!multilevel_allowed) { 4332 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4333 if (multilevel_requested) { 4334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4335 } else if (pcbddc->max_levels) { 4336 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4337 } 4338 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4339 } 4340 } 4341 4342 /* create the coarse KSP object only once with defaults */ 4343 if (coarse_mat_is) { 4344 MatReuse coarse_mat_reuse; 4345 PetscViewer dbg_viewer = NULL; 4346 if (pcbddc->dbg_flag) { 4347 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4348 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4349 } 4350 if (!pcbddc->coarse_ksp) { 4351 char prefix[256],str_level[16]; 4352 size_t len; 4353 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4354 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4355 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4356 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4357 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4358 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4359 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4360 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4361 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4362 /* prefix */ 4363 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4364 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4365 if (!pcbddc->current_level) { 4366 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4367 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4368 } else { 4369 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4370 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4371 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4372 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4373 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4374 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4375 } 4376 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4377 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4378 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4379 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4380 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4381 /* allow user customization */ 4382 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4383 } 4384 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4385 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4386 if (nisdofs) { 4387 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4388 for (i=0;i<nisdofs;i++) { 4389 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4390 } 4391 } 4392 if (nisneu) { 4393 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4394 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4395 } 4396 4397 /* get some info after set from options */ 4398 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4399 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4400 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4401 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4402 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4403 isbddc = PETSC_FALSE; 4404 } 4405 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4406 if (isredundant) { 4407 KSP inner_ksp; 4408 PC inner_pc; 4409 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4410 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4411 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4412 } 4413 4414 /* assemble coarse matrix */ 4415 if (coarse_reuse) { 4416 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4417 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4418 coarse_mat_reuse = MAT_REUSE_MATRIX; 4419 } else { 4420 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4421 } 4422 if (isbddc || isnn) { 4423 if (pcbddc->coarsening_ratio > 1) { 4424 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4425 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4426 if (pcbddc->dbg_flag) { 4427 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4428 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4429 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4430 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4431 } 4432 } 4433 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4434 } else { 4435 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4436 coarse_mat = coarse_mat_is; 4437 } 4438 } else { 4439 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4440 } 4441 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4442 4443 /* propagate symmetry info of coarse matrix */ 4444 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4445 if (pc->pmat->symmetric_set) { 4446 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4447 } 4448 if (pc->pmat->hermitian_set) { 4449 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4450 } 4451 if (pc->pmat->spd_set) { 4452 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4453 } 4454 /* set operators */ 4455 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4456 if (pcbddc->dbg_flag) { 4457 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4458 } 4459 } else { /* processes non partecipating to coarse solver (if any) */ 4460 coarse_mat = 0; 4461 } 4462 ierr = PetscFree(isarray);CHKERRQ(ierr); 4463 #if 0 4464 { 4465 PetscViewer viewer; 4466 char filename[256]; 4467 sprintf(filename,"coarse_mat.m"); 4468 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4469 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4470 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4471 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4472 } 4473 #endif 4474 4475 /* Compute coarse null space (special handling by BDDC only) */ 4476 #if 0 4477 if (pcbddc->NullSpace) { 4478 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4479 } 4480 #endif 4481 4482 if (pcbddc->coarse_ksp) { 4483 Vec crhs,csol; 4484 PetscBool ispreonly; 4485 4486 if (CoarseNullSpace) { 4487 if (isbddc) { 4488 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4489 } else { 4490 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4491 } 4492 } 4493 /* setup coarse ksp */ 4494 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4495 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4496 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4497 /* hack */ 4498 if (!csol) { 4499 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4500 } 4501 if (!crhs) { 4502 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4503 } 4504 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4505 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4506 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4507 KSP check_ksp; 4508 KSPType check_ksp_type; 4509 PC check_pc; 4510 Vec check_vec,coarse_vec; 4511 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4512 PetscInt its; 4513 PetscBool compute_eigs; 4514 PetscReal *eigs_r,*eigs_c; 4515 PetscInt neigs; 4516 const char *prefix; 4517 4518 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4519 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4520 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4521 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4522 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4523 if (ispreonly) { 4524 check_ksp_type = KSPPREONLY; 4525 compute_eigs = PETSC_FALSE; 4526 } else { 4527 check_ksp_type = KSPGMRES; 4528 compute_eigs = PETSC_TRUE; 4529 } 4530 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4531 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4532 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4533 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4534 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4535 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4536 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4537 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4538 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4539 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4540 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4541 /* create random vec */ 4542 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4543 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4544 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4545 if (CoarseNullSpace) { 4546 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4547 } 4548 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4549 /* solve coarse problem */ 4550 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4551 if (CoarseNullSpace) { 4552 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4553 } 4554 /* set eigenvalue estimation if preonly has not been requested */ 4555 if (compute_eigs) { 4556 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4557 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4558 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4559 lambda_max = eigs_r[neigs-1]; 4560 lambda_min = eigs_r[0]; 4561 if (pcbddc->use_coarse_estimates) { 4562 if (lambda_max>lambda_min) { 4563 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4564 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4565 } 4566 } 4567 } 4568 4569 /* check coarse problem residual error */ 4570 if (pcbddc->dbg_flag) { 4571 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4572 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4573 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4574 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4575 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4576 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4577 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4579 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4580 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4581 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4582 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4583 if (compute_eigs) { 4584 PetscReal lambda_max_s,lambda_min_s; 4585 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4586 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4587 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4588 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4589 for (i=0;i<neigs;i++) { 4590 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4591 } 4592 } 4593 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4594 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4595 } 4596 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4597 if (compute_eigs) { 4598 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4599 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4600 } 4601 } 4602 } 4603 /* print additional info */ 4604 if (pcbddc->dbg_flag) { 4605 /* waits until all processes reaches this point */ 4606 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4607 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4608 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4609 } 4610 4611 /* free memory */ 4612 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4613 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4614 PetscFunctionReturn(0); 4615 } 4616 4617 #undef __FUNCT__ 4618 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4619 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4620 { 4621 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4622 PC_IS* pcis = (PC_IS*)pc->data; 4623 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4624 IS subset,subset_mult,subset_n; 4625 PetscInt local_size,coarse_size=0; 4626 PetscInt *local_primal_indices=NULL; 4627 const PetscInt *t_local_primal_indices; 4628 PetscErrorCode ierr; 4629 4630 PetscFunctionBegin; 4631 /* Compute global number of coarse dofs */ 4632 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4633 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4634 } 4635 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4636 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4637 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4638 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4639 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4640 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4641 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4642 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4643 if (local_size != pcbddc->local_primal_size) { 4644 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4645 } 4646 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4647 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4648 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4649 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4650 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4651 4652 /* check numbering */ 4653 if (pcbddc->dbg_flag) { 4654 PetscScalar coarsesum,*array; 4655 PetscInt i; 4656 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4657 4658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4661 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4662 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4663 for (i=0;i<pcbddc->local_primal_size;i++) { 4664 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4665 } 4666 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4667 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4668 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4669 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4670 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4671 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4672 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4673 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4674 for (i=0;i<pcis->n;i++) { 4675 if (array[i] == 1.0) { 4676 set_error = PETSC_TRUE; 4677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4678 } 4679 } 4680 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4681 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4682 for (i=0;i<pcis->n;i++) { 4683 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4684 } 4685 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4686 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4687 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4688 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4689 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4691 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4692 PetscInt *gidxs; 4693 4694 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4695 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4696 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4697 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4699 for (i=0;i<pcbddc->local_primal_size;i++) { 4700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4701 } 4702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4703 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4704 } 4705 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4706 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4707 } 4708 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4709 /* get back data */ 4710 *coarse_size_n = coarse_size; 4711 *local_primal_indices_n = local_primal_indices; 4712 PetscFunctionReturn(0); 4713 } 4714 4715 #undef __FUNCT__ 4716 #define __FUNCT__ "PCBDDCGlobalToLocal" 4717 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4718 { 4719 IS localis_t; 4720 PetscInt i,lsize,*idxs,n; 4721 PetscScalar *vals; 4722 PetscErrorCode ierr; 4723 4724 PetscFunctionBegin; 4725 /* get indices in local ordering exploiting local to global map */ 4726 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4727 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4728 for (i=0;i<lsize;i++) vals[i] = 1.0; 4729 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4730 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4731 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4732 if (idxs) { /* multilevel guard */ 4733 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4734 } 4735 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4736 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4737 ierr = PetscFree(vals);CHKERRQ(ierr); 4738 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4739 /* now compute set in local ordering */ 4740 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4741 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4742 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4743 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4744 for (i=0,lsize=0;i<n;i++) { 4745 if (PetscRealPart(vals[i]) > 0.5) { 4746 lsize++; 4747 } 4748 } 4749 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4750 for (i=0,lsize=0;i<n;i++) { 4751 if (PetscRealPart(vals[i]) > 0.5) { 4752 idxs[lsize++] = i; 4753 } 4754 } 4755 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4756 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4757 *localis = localis_t; 4758 PetscFunctionReturn(0); 4759 } 4760 4761 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4762 #undef __FUNCT__ 4763 #define __FUNCT__ "PCBDDCMatMult_Private" 4764 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4765 { 4766 PCBDDCChange_ctx change_ctx; 4767 PetscErrorCode ierr; 4768 4769 PetscFunctionBegin; 4770 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4771 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4772 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4773 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4774 PetscFunctionReturn(0); 4775 } 4776 4777 #undef __FUNCT__ 4778 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4779 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4780 { 4781 PCBDDCChange_ctx change_ctx; 4782 PetscErrorCode ierr; 4783 4784 PetscFunctionBegin; 4785 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4786 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4787 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4788 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4789 PetscFunctionReturn(0); 4790 } 4791 4792 #undef __FUNCT__ 4793 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4794 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4795 { 4796 PC_IS *pcis=(PC_IS*)pc->data; 4797 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4798 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4799 Mat S_j; 4800 PetscInt *used_xadj,*used_adjncy; 4801 PetscBool free_used_adj; 4802 PetscErrorCode ierr; 4803 4804 PetscFunctionBegin; 4805 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4806 free_used_adj = PETSC_FALSE; 4807 if (pcbddc->sub_schurs_layers == -1) { 4808 used_xadj = NULL; 4809 used_adjncy = NULL; 4810 } else { 4811 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4812 used_xadj = pcbddc->mat_graph->xadj; 4813 used_adjncy = pcbddc->mat_graph->adjncy; 4814 } else if (pcbddc->computed_rowadj) { 4815 used_xadj = pcbddc->mat_graph->xadj; 4816 used_adjncy = pcbddc->mat_graph->adjncy; 4817 } else { 4818 PetscBool flg_row=PETSC_FALSE; 4819 const PetscInt *xadj,*adjncy; 4820 PetscInt nvtxs; 4821 4822 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4823 if (flg_row) { 4824 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4825 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4826 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4827 free_used_adj = PETSC_TRUE; 4828 } else { 4829 pcbddc->sub_schurs_layers = -1; 4830 used_xadj = NULL; 4831 used_adjncy = NULL; 4832 } 4833 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4834 } 4835 } 4836 4837 /* setup sub_schurs data */ 4838 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4839 if (!sub_schurs->use_mumps) { 4840 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4841 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4842 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4843 } else { 4844 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4845 PetscBool isseqaij; 4846 if (!pcbddc->use_vertices && reuse_solvers) { 4847 PetscInt n_vertices; 4848 4849 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4850 reuse_solvers = (PetscBool)!n_vertices; 4851 } 4852 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4853 if (!isseqaij) { 4854 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4855 if (matis->A == pcbddc->local_mat) { 4856 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4857 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4858 } else { 4859 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4860 } 4861 } 4862 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4863 } 4864 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4865 4866 /* free adjacency */ 4867 if (free_used_adj) { 4868 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4869 } 4870 PetscFunctionReturn(0); 4871 } 4872 4873 #undef __FUNCT__ 4874 #define __FUNCT__ "PCBDDCInitSubSchurs" 4875 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4876 { 4877 PC_IS *pcis=(PC_IS*)pc->data; 4878 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4879 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4880 PCBDDCGraph graph; 4881 PetscErrorCode ierr; 4882 4883 PetscFunctionBegin; 4884 /* attach interface graph for determining subsets */ 4885 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4886 IS verticesIS,verticescomm; 4887 PetscInt vsize,*idxs; 4888 4889 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4890 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4891 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4892 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4893 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4894 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4895 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4896 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4897 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4898 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4899 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4900 /* 4901 if (pcbddc->dbg_flag) { 4902 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4903 } 4904 */ 4905 } else { 4906 graph = pcbddc->mat_graph; 4907 } 4908 4909 /* sub_schurs init */ 4910 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4911 4912 /* free graph struct */ 4913 if (pcbddc->sub_schurs_rebuild) { 4914 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4915 } 4916 PetscFunctionReturn(0); 4917 } 4918