1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 273 } else { 274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 291 for (j=0;j<B_N;j++) { 292 #if defined(PETSC_USE_COMPLEX) 293 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 294 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 295 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 296 #else 297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 298 #endif 299 } 300 } 301 } 302 #if 0 303 for (j=0;j<B_neigs;j++) { 304 PetscBLASInt Blas_N,Blas_one = 1.0; 305 PetscScalar norm; 306 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 307 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 308 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 310 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 311 } else { 312 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 313 } 314 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 315 } 316 #endif 317 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 318 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 319 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 320 cum++; 321 } 322 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 323 /* shift for next computation */ 324 cumarray += subset_size*subset_size; 325 } 326 if (pcbddc->dbg_flag) { 327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 328 } 329 330 if (mss) { 331 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 332 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 333 /* destroy matrices (junk) */ 334 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 335 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 336 } 337 if (allocated_S_St) { 338 ierr = PetscFree2(S,St);CHKERRQ(ierr); 339 } 340 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 ierr = PetscFree(rwork);CHKERRQ(ierr); 343 #endif 344 if (pcbddc->dbg_flag) { 345 PetscInt maxneigs_r; 346 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "PCBDDCSetUpSolvers" 354 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 355 { 356 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 357 PetscScalar *coarse_submat_vals; 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 /* Setup local scatters R_to_B and (optionally) R_to_D */ 362 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 363 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 364 365 /* Setup local neumann solver ksp_R */ 366 /* PCBDDCSetUpLocalScatters should be called first! */ 367 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 368 369 /* Change global null space passed in by the user if change of basis has been requested */ 370 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 371 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 372 } 373 374 /* 375 Setup local correction and local part of coarse basis. 376 Gives back the dense local part of the coarse matrix in column major ordering 377 */ 378 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 379 380 /* Compute total number of coarse nodes and setup coarse solver */ 381 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 382 383 /* free */ 384 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetCustomization" 390 PetscErrorCode PCBDDCResetCustomization(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 397 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 398 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 399 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 401 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 402 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 403 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 405 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 406 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCBDDCResetTopography" 412 PetscErrorCode PCBDDCResetTopography(PC pc) 413 { 414 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 421 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 422 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCResetSolvers" 428 PetscErrorCode PCBDDCResetSolvers(PC pc) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 PetscScalar *array; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 436 if (pcbddc->coarse_phi_B) { 437 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 438 ierr = PetscFree(array);CHKERRQ(ierr); 439 } 440 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 444 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 449 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 451 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 452 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 453 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 454 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 455 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 456 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 459 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 460 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 461 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 462 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 469 { 470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 471 PC_IS *pcis = (PC_IS*)pc->data; 472 VecType impVecType; 473 PetscInt n_constraints,n_R,old_size; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 if (!pcbddc->ConstraintMatrix) { 478 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 479 } 480 /* get sizes */ 481 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 482 n_R = pcis->n-pcbddc->n_vertices; 483 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 484 /* local work vectors (try to avoid unneeded work)*/ 485 /* R nodes */ 486 old_size = -1; 487 if (pcbddc->vec1_R) { 488 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 489 } 490 if (n_R != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 492 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 493 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 495 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 496 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 497 } 498 /* local primal dofs */ 499 old_size = -1; 500 if (pcbddc->vec1_P) { 501 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 502 } 503 if (pcbddc->local_primal_size != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 508 } 509 /* local explicit constraints */ 510 old_size = -1; 511 if (pcbddc->vec1_C) { 512 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 513 } 514 if (n_constraints && n_constraints != old_size) { 515 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 516 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 518 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCBDDCSetUpCorrection" 525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 526 { 527 PetscErrorCode ierr; 528 /* pointers to pcis and pcbddc */ 529 PC_IS* pcis = (PC_IS*)pc->data; 530 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 531 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 532 /* submatrices of local problem */ 533 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 534 /* submatrices of local coarse problem */ 535 Mat S_VV,S_CV,S_VC,S_CC; 536 /* working matrices */ 537 Mat C_CR; 538 /* additional working stuff */ 539 PC pc_R; 540 Mat F; 541 PetscBool isLU,isCHOL,isILU; 542 543 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 544 PetscScalar *work; 545 PetscInt *idx_V_B; 546 PetscInt n,n_vertices,n_constraints; 547 PetscInt i,n_R,n_D,n_B; 548 PetscBool unsymmetric_check; 549 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 550 MatType impMatType; 551 /* some shortcuts to scalars */ 552 PetscScalar one=1.0,m_one=-1.0; 553 554 PetscFunctionBegin; 555 n_vertices = pcbddc->n_vertices; 556 n_constraints = pcbddc->local_primal_size-n_vertices; 557 /* Set Non-overlapping dimensions */ 558 n_B = pcis->n_B; 559 n_D = pcis->n - n_B; 560 n_R = pcis->n - n_vertices; 561 562 /* Set types for local objects needed by BDDC precondtioner */ 563 impMatType = MATSEQDENSE; 564 565 /* vertices in boundary numbering */ 566 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 567 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 568 if (i != n_vertices) { 569 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 570 } 571 572 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 573 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 576 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 577 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 579 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 581 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 582 583 unsymmetric_check = PETSC_FALSE; 584 /* allocate workspace */ 585 n = 0; 586 if (n_constraints) { 587 n += n_R*n_constraints; 588 } 589 if (n_vertices) { 590 n = PetscMax(2*n_R*n_vertices,n); 591 n = PetscMax((n_R+n_B)*n_vertices,n); 592 } 593 if (!pcbddc->symmetric_primal) { 594 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 595 unsymmetric_check = PETSC_TRUE; 596 } 597 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 598 599 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 600 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 601 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 602 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 603 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 604 if (isLU || isILU || isCHOL) { 605 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 606 } else if (sub_schurs->reuse_mumps) { 607 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 608 MatFactorType type; 609 610 F = reuse_mumps->F; 611 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 612 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 613 } else { 614 F = NULL; 615 } 616 617 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 618 if (n_constraints) { 619 Mat M1,M2,M3; 620 Mat auxmat; 621 IS is_aux; 622 PetscScalar *array,*array2; 623 624 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 626 627 /* Extract constraints on R nodes: C_{CR} */ 628 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 629 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 630 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 631 632 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 633 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 634 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 635 for (i=0;i<n_constraints;i++) { 636 const PetscScalar *row_cmat_values; 637 const PetscInt *row_cmat_indices; 638 PetscInt size_of_constraint,j; 639 640 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 641 for (j=0;j<size_of_constraint;j++) { 642 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 643 } 644 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 647 if (F) { 648 Mat B; 649 650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 651 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 652 ierr = MatDestroy(&B);CHKERRQ(ierr); 653 } else { 654 PetscScalar *marr; 655 656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 663 } 664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 665 } 666 if (!pcbddc->switch_static) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 670 for (i=0;i<n_constraints;i++) { 671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 677 } 678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 680 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 681 } else { 682 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 683 pcbddc->local_auxmat2 = local_auxmat2_R; 684 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 685 } 686 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 687 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 688 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 689 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 690 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 691 if (isCHOL) { 692 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 693 } else { 694 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 695 } 696 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 697 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 698 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 699 ierr = MatDestroy(&M2);CHKERRQ(ierr); 700 ierr = MatDestroy(&M3);CHKERRQ(ierr); 701 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 702 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 703 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 704 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 705 ierr = MatDestroy(&M1);CHKERRQ(ierr); 706 } 707 /* Get submatrices from subdomain matrix */ 708 if (n_vertices) { 709 IS is_aux; 710 711 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 712 IS tis; 713 714 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 715 ierr = ISSort(tis);CHKERRQ(ierr); 716 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 ierr = ISDestroy(&tis);CHKERRQ(ierr); 718 } else { 719 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 } 721 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 724 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 725 } 726 727 /* Matrix of coarse basis functions (local) */ 728 if (pcbddc->coarse_phi_B) { 729 PetscInt on_B,on_primal,on_D=n_D; 730 if (pcbddc->coarse_phi_D) { 731 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 732 } 733 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 734 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 735 PetscScalar *marray; 736 737 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 738 ierr = PetscFree(marray);CHKERRQ(ierr); 739 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 740 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 741 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 743 } 744 } 745 746 if (!pcbddc->coarse_phi_B) { 747 PetscScalar *marray; 748 749 n = n_B*pcbddc->local_primal_size; 750 if (pcbddc->switch_static || pcbddc->dbg_flag) { 751 n += n_D*pcbddc->local_primal_size; 752 } 753 if (!pcbddc->symmetric_primal) { 754 n *= 2; 755 } 756 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 n = n_B*pcbddc->local_primal_size; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 n += n_D*pcbddc->local_primal_size; 762 } 763 if (!pcbddc->symmetric_primal) { 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n = n_B*pcbddc->local_primal_size; 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 768 } 769 } else { 770 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 771 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 774 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 775 } 776 } 777 } 778 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 779 /* vertices */ 780 if (n_vertices) { 781 782 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 783 784 if (n_R) { 785 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 786 PetscBLASInt B_N,B_one = 1; 787 PetscScalar *x,*y; 788 PetscBool isseqaij; 789 790 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 791 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 793 if (F) { /* TODO could be optimized for symmetric problems */ 794 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 795 } else { 796 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 797 for (i=0;i<n_vertices;i++) { 798 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 799 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 800 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 801 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 802 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 803 } 804 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 805 } 806 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 807 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 808 if (n_constraints) { 809 Mat B; 810 811 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 812 for (i=0;i<n_vertices;i++) { 813 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 814 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 818 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 819 } 820 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 822 ierr = MatDestroy(&B);CHKERRQ(ierr); 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 825 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 826 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 827 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 828 ierr = MatDestroy(&B);CHKERRQ(ierr); 829 } 830 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 831 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 832 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 833 } 834 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 835 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 836 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 837 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 838 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 840 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 842 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 843 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 844 } else { 845 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 } 847 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 848 /* coarse basis functions */ 849 for (i=0;i<n_vertices;i++) { 850 PetscScalar *y; 851 852 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 854 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 y[n_B*i+idx_V_B[i]] = 1.0; 858 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 859 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 860 861 if (pcbddc->switch_static || pcbddc->dbg_flag) { 862 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 863 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 864 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 867 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 868 } 869 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 870 } 871 /* if n_R == 0 the object is not destroyed */ 872 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 873 } 874 875 if (n_constraints) { 876 Mat B; 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 879 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 880 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 881 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 882 if (n_vertices) { 883 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 884 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 885 } else { 886 Mat S_VCt; 887 888 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 889 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 891 } 892 } 893 ierr = MatDestroy(&B);CHKERRQ(ierr); 894 /* coarse basis functions */ 895 for (i=0;i<n_constraints;i++) { 896 PetscScalar *y; 897 898 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 899 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 900 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 901 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 904 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 905 if (pcbddc->switch_static || pcbddc->dbg_flag) { 906 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 907 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 908 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 911 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 912 } 913 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 914 } 915 } 916 if (n_constraints) { 917 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 918 } 919 920 /* compute other basis functions for non-symmetric problems */ 921 if (!pcbddc->symmetric_primal) { 922 923 if (n_constraints) { 924 Mat S_CCT,B_C; 925 926 /* this is a lazy thing */ 927 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 928 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 929 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 930 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 931 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 932 if (n_vertices) { 933 Mat B_V,S_VCT; 934 935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 936 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 937 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 938 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 940 } 941 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 942 } else { /* if there are no constraints, reset work */ 943 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 944 } 945 if (n_vertices && n_R) { 946 Mat A_VRT; 947 PetscScalar *marray; 948 PetscBLASInt B_N,B_one = 1; 949 950 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 951 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 952 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 953 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 954 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 955 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 957 } 958 959 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 960 for (i=0;i<pcbddc->local_primal_size;i++) { 961 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 962 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 963 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 964 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 965 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 966 } 967 } else { 968 for (i=0;i<pcbddc->local_primal_size;i++) { 969 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 970 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 971 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 972 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 973 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 974 } 975 } 976 /* coarse basis functions */ 977 for (i=0;i<pcbddc->local_primal_size;i++) { 978 PetscScalar *y; 979 980 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 981 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 982 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 if (i<n_vertices) { 986 y[n_B*i+idx_V_B[i]] = 1.0; 987 } 988 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 989 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 990 991 if (pcbddc->switch_static || pcbddc->dbg_flag) { 992 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 993 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 994 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 995 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 997 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 998 } 999 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1000 } 1001 } 1002 /* free memory */ 1003 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1004 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1005 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1006 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1008 ierr = PetscFree(work);CHKERRQ(ierr); 1009 if (n_vertices) { 1010 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1011 } 1012 if (n_constraints) { 1013 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1014 } 1015 /* Checking coarse_sub_mat and coarse basis functios */ 1016 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1017 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1018 if (pcbddc->dbg_flag) { 1019 Mat coarse_sub_mat; 1020 Mat AUXMAT,TM1,TM2,TM3,TM4; 1021 Mat coarse_phi_D,coarse_phi_B; 1022 Mat coarse_psi_D,coarse_psi_B; 1023 Mat A_II,A_BB,A_IB,A_BI; 1024 Mat C_B,CPHI; 1025 IS is_dummy; 1026 Vec mones; 1027 MatType checkmattype=MATSEQAIJ; 1028 PetscReal real_value; 1029 1030 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1031 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1032 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1033 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1034 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1035 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1036 if (unsymmetric_check) { 1037 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1039 } 1040 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1041 1042 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1043 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1045 if (unsymmetric_check) { 1046 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1047 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1048 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 } else { 1059 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1060 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 } 1068 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1069 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1070 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1071 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1074 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1076 1077 /* check constraints */ 1078 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1079 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1080 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1081 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1082 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1083 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1084 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1086 if (unsymmetric_check) { 1087 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 } 1093 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1094 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1095 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1096 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1097 1098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1108 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1109 if (unsymmetric_check) { 1110 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1112 } 1113 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1114 } 1115 /* get back data */ 1116 *coarse_submat_vals_n = coarse_submat_vals; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1122 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1123 { 1124 Mat *work_mat; 1125 IS isrow_s,iscol_s; 1126 PetscBool rsorted,csorted; 1127 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1132 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1134 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1135 1136 if (!rsorted) { 1137 const PetscInt *idxs; 1138 PetscInt *idxs_sorted,i; 1139 1140 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1142 for (i=0;i<rsize;i++) { 1143 idxs_perm_r[i] = i; 1144 } 1145 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1146 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1147 for (i=0;i<rsize;i++) { 1148 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1149 } 1150 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1151 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1152 } else { 1153 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1154 isrow_s = isrow; 1155 } 1156 1157 if (!csorted) { 1158 if (isrow == iscol) { 1159 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1160 iscol_s = isrow_s; 1161 } else { 1162 const PetscInt *idxs; 1163 PetscInt *idxs_sorted,i; 1164 1165 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1167 for (i=0;i<csize;i++) { 1168 idxs_perm_c[i] = i; 1169 } 1170 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1171 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1172 for (i=0;i<csize;i++) { 1173 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1174 } 1175 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1176 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1177 } 1178 } else { 1179 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1180 iscol_s = iscol; 1181 } 1182 1183 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1184 1185 if (!rsorted || !csorted) { 1186 Mat new_mat; 1187 IS is_perm_r,is_perm_c; 1188 1189 if (!rsorted) { 1190 PetscInt *idxs_r,i; 1191 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1192 for (i=0;i<rsize;i++) { 1193 idxs_r[idxs_perm_r[i]] = i; 1194 } 1195 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1196 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1197 } else { 1198 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1199 } 1200 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1201 1202 if (!csorted) { 1203 if (isrow_s == iscol_s) { 1204 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1205 is_perm_c = is_perm_r; 1206 } else { 1207 PetscInt *idxs_c,i; 1208 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1209 for (i=0;i<csize;i++) { 1210 idxs_c[idxs_perm_c[i]] = i; 1211 } 1212 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1213 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1214 } 1215 } else { 1216 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1219 1220 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1221 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1222 work_mat[0] = new_mat; 1223 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1224 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1225 } 1226 1227 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1228 *B = work_mat[0]; 1229 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1230 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1231 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1237 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1238 { 1239 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1240 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1241 Mat new_mat; 1242 IS is_local,is_global; 1243 PetscInt local_size; 1244 PetscBool isseqaij; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1249 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1250 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1252 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1253 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1254 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1255 1256 /* check */ 1257 if (pcbddc->dbg_flag) { 1258 Vec x,x_change; 1259 PetscReal error; 1260 1261 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1262 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1263 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1267 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1269 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1270 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x);CHKERRQ(ierr); 1274 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1275 } 1276 1277 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1278 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1279 if (isseqaij) { 1280 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1281 } else { 1282 Mat work_mat; 1283 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1284 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1286 } 1287 if (matis->A->symmetric_set) { 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1289 #if !defined(PETSC_USE_COMPLEX) 1290 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1291 #endif 1292 } 1293 /* 1294 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1295 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1296 */ 1297 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1303 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1304 { 1305 PC_IS* pcis = (PC_IS*)(pc->data); 1306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1308 PetscInt *idx_R_local=NULL; 1309 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1310 PetscInt vbs,bs; 1311 PetscBT bitmask=NULL; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 /* 1316 No need to setup local scatters if 1317 - primal space is unchanged 1318 AND 1319 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1320 AND 1321 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1322 */ 1323 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1324 PetscFunctionReturn(0); 1325 } 1326 /* destroy old objects */ 1327 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1328 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1329 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1330 /* Set Non-overlapping dimensions */ 1331 n_B = pcis->n_B; 1332 n_D = pcis->n - n_B; 1333 n_vertices = pcbddc->n_vertices; 1334 1335 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1336 1337 /* create auxiliary bitmask and allocate workspace */ 1338 if (!sub_schurs->reuse_mumps) { 1339 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1340 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1341 for (i=0;i<n_vertices;i++) { 1342 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1343 } 1344 1345 for (i=0, n_R=0; i<pcis->n; i++) { 1346 if (!PetscBTLookup(bitmask,i)) { 1347 idx_R_local[n_R++] = i; 1348 } 1349 } 1350 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1351 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1352 1353 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1354 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1355 } 1356 1357 /* Block code */ 1358 vbs = 1; 1359 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1360 if (bs>1 && !(n_vertices%bs)) { 1361 PetscBool is_blocked = PETSC_TRUE; 1362 PetscInt *vary; 1363 if (!sub_schurs->reuse_mumps) { 1364 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1365 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1366 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1367 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1368 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1369 for (i=0; i<pcis->n/bs; i++) { 1370 if (vary[i]!=0 && vary[i]!=bs) { 1371 is_blocked = PETSC_FALSE; 1372 break; 1373 } 1374 } 1375 ierr = PetscFree(vary);CHKERRQ(ierr); 1376 } else { 1377 /* Verify directly the R set */ 1378 for (i=0; i<n_R/bs; i++) { 1379 PetscInt j,node=idx_R_local[bs*i]; 1380 for (j=1; j<bs; j++) { 1381 if (node != idx_R_local[bs*i+j]-j) { 1382 is_blocked = PETSC_FALSE; 1383 break; 1384 } 1385 } 1386 } 1387 } 1388 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1389 vbs = bs; 1390 for (i=0;i<n_R/vbs;i++) { 1391 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1392 } 1393 } 1394 } 1395 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1396 if (sub_schurs->reuse_mumps) { 1397 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1398 1399 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1400 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1401 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1402 reuse_mumps->is_R = pcbddc->is_R_local; 1403 } else { 1404 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1405 } 1406 1407 /* print some info if requested */ 1408 if (pcbddc->dbg_flag) { 1409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 } 1417 1418 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1419 if (!sub_schurs->reuse_mumps) { 1420 IS is_aux1,is_aux2; 1421 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1422 1423 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1425 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1426 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1427 for (i=0; i<n_D; i++) { 1428 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1429 } 1430 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0, j=0; i<n_R; i++) { 1432 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1433 aux_array1[j++] = i; 1434 } 1435 } 1436 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1437 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 for (i=0, j=0; i<n_B; i++) { 1439 if (!PetscBTLookup(bitmask,is_indices[i])) { 1440 aux_array2[j++] = i; 1441 } 1442 } 1443 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1447 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1448 1449 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1450 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_R; i++) { 1452 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1453 aux_array1[j++] = i; 1454 } 1455 } 1456 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1459 } 1460 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1461 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1462 } else { 1463 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1464 IS tis; 1465 PetscInt schur_size; 1466 1467 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1468 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1469 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1470 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1472 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 } 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1483 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1484 { 1485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1486 PC_IS *pcis = (PC_IS*)pc->data; 1487 PC pc_temp; 1488 Mat A_RR; 1489 MatReuse reuse; 1490 PetscScalar m_one = -1.0; 1491 PetscReal value; 1492 PetscInt n_D,n_R; 1493 PetscBool use_exact,use_exact_reduced,issbaij; 1494 PetscErrorCode ierr; 1495 /* prefixes stuff */ 1496 char dir_prefix[256],neu_prefix[256],str_level[16]; 1497 size_t len; 1498 1499 PetscFunctionBegin; 1500 1501 /* compute prefixes */ 1502 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1503 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1504 if (!pcbddc->current_level) { 1505 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1506 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1507 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1508 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1509 } else { 1510 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1511 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1512 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1513 len -= 15; /* remove "pc_bddc_coarse_" */ 1514 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1515 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1516 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1517 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1522 } 1523 1524 /* DIRICHLET PROBLEM */ 1525 if (dirichlet) { 1526 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1527 if (pcbddc->local_mat->symmetric_set) { 1528 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1529 } 1530 /* Matrix for Dirichlet problem is pcis->A_II */ 1531 n_D = pcis->n - pcis->n_B; 1532 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1533 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1534 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1535 /* default */ 1536 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1537 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1538 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1539 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1540 if (issbaij) { 1541 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1542 } else { 1543 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1544 } 1545 /* Allow user's customization */ 1546 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1548 } 1549 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1550 if (sub_schurs->reuse_mumps) { 1551 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1552 1553 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1554 } 1555 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1556 if (!n_D) { 1557 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1558 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1559 } 1560 /* Set Up KSP for Dirichlet problem of BDDC */ 1561 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1562 /* set ksp_D into pcis data */ 1563 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1564 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1565 pcis->ksp_D = pcbddc->ksp_D; 1566 } 1567 1568 /* NEUMANN PROBLEM */ 1569 A_RR = 0; 1570 if (neumann) { 1571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1572 PetscInt ibs,mbs; 1573 PetscBool issbaij; 1574 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1575 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1576 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1577 if (pcbddc->ksp_R) { /* already created ksp */ 1578 PetscInt nn_R; 1579 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1580 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1581 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1582 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1583 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1584 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1585 reuse = MAT_INITIAL_MATRIX; 1586 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1587 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* safe to reuse the matrix */ 1591 reuse = MAT_REUSE_MATRIX; 1592 } 1593 } 1594 /* last check */ 1595 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1596 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1597 reuse = MAT_INITIAL_MATRIX; 1598 } 1599 } else { /* first time, so we need to create the matrix */ 1600 reuse = MAT_INITIAL_MATRIX; 1601 } 1602 /* extract A_RR */ 1603 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1604 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1605 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1606 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1607 if (matis->A == pcbddc->local_mat) { 1608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1609 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1610 } else { 1611 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1612 } 1613 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1614 if (matis->A == pcbddc->local_mat) { 1615 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1616 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1617 } else { 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1619 } 1620 } 1621 if (!sub_schurs->reuse_mumps) { 1622 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1623 if (pcbddc->local_mat->symmetric_set) { 1624 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1625 } 1626 } else { 1627 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1628 1629 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1630 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1632 } 1633 if (!pcbddc->ksp_R) { /* create object if not present */ 1634 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1635 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1636 /* default */ 1637 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1638 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1639 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1640 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1641 if (issbaij) { 1642 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1643 } else { 1644 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1645 } 1646 /* Allow user's customization */ 1647 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1648 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1649 } 1650 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1651 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1652 if (!n_R) { 1653 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1654 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1655 } 1656 /* Reuse MUMPS solver if it is present */ 1657 if (sub_schurs->reuse_mumps) { 1658 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1659 1660 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1661 } 1662 /* Set Up KSP for Neumann problem of BDDC */ 1663 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1664 } 1665 /* free Neumann problem's matrix */ 1666 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1667 1668 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1669 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1670 if (pcbddc->dbg_flag) { 1671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1672 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1674 } 1675 if (dirichlet) { /* Dirichlet */ 1676 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1677 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1678 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1679 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1680 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1681 /* need to be adapted? */ 1682 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1683 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1684 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1685 /* print info */ 1686 if (pcbddc->dbg_flag) { 1687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1689 } 1690 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1691 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1692 } 1693 } 1694 if (neumann) { /* Neumann */ 1695 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1696 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1697 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1698 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1700 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1701 /* need to be adapted? */ 1702 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1703 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1704 /* print info */ 1705 if (pcbddc->dbg_flag) { 1706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1708 } 1709 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1710 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1711 } 1712 } 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1719 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1720 { 1721 PetscErrorCode ierr; 1722 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1723 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1724 1725 PetscFunctionBegin; 1726 if (!sub_schurs->reuse_mumps) { 1727 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1728 } 1729 if (!pcbddc->switch_static) { 1730 if (applytranspose && pcbddc->local_auxmat1) { 1731 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1732 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1733 } 1734 if (!sub_schurs->reuse_mumps) { 1735 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 } else { 1738 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1739 1740 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1741 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 } 1743 } else { 1744 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 if (applytranspose && pcbddc->local_auxmat1) { 1749 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1750 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1751 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 } 1754 } 1755 if (!sub_schurs->reuse_mumps) { 1756 if (applytranspose) { 1757 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1758 } else { 1759 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1760 } 1761 #if defined(PETSC_HAVE_MUMPS) 1762 } else { 1763 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1764 1765 if (applytranspose) { 1766 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1767 } else { 1768 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1769 } 1770 #endif 1771 } 1772 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1773 if (!pcbddc->switch_static) { 1774 if (!sub_schurs->reuse_mumps) { 1775 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 } else { 1778 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1779 1780 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1781 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1782 } 1783 if (!applytranspose && pcbddc->local_auxmat1) { 1784 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1785 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1786 } 1787 } else { 1788 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1789 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 if (!applytranspose && pcbddc->local_auxmat1) { 1793 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1794 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1795 } 1796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1807 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1808 { 1809 PetscErrorCode ierr; 1810 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1811 PC_IS* pcis = (PC_IS*) (pc->data); 1812 const PetscScalar zero = 0.0; 1813 1814 PetscFunctionBegin; 1815 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1816 if (applytranspose) { 1817 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1818 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1819 } else { 1820 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1821 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1822 } 1823 /* start communications from local primal nodes to rhs of coarse solver */ 1824 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1825 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1826 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 1828 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1829 /* TODO remove null space when doing multilevel */ 1830 if (pcbddc->coarse_ksp) { 1831 Vec rhs,sol; 1832 1833 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1834 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1835 if (applytranspose) { 1836 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1837 } else { 1838 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1839 } 1840 } 1841 1842 /* Local solution on R nodes */ 1843 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1844 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1845 } 1846 1847 /* communications from coarse sol to local primal nodes */ 1848 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1849 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 1851 /* Sum contributions from two levels */ 1852 if (applytranspose) { 1853 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1854 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1855 } else { 1856 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1857 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1864 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1865 { 1866 PetscErrorCode ierr; 1867 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1868 PetscScalar *array; 1869 Vec from,to; 1870 1871 PetscFunctionBegin; 1872 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1873 from = pcbddc->coarse_vec; 1874 to = pcbddc->vec1_P; 1875 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1876 Vec tvec; 1877 1878 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1879 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1880 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1881 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1882 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1883 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1884 } 1885 } else { /* from local to global -> put data in coarse right hand side */ 1886 from = pcbddc->vec1_P; 1887 to = pcbddc->coarse_vec; 1888 } 1889 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1895 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1896 { 1897 PetscErrorCode ierr; 1898 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1899 PetscScalar *array; 1900 Vec from,to; 1901 1902 PetscFunctionBegin; 1903 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1904 from = pcbddc->coarse_vec; 1905 to = pcbddc->vec1_P; 1906 } else { /* from local to global -> put data in coarse right hand side */ 1907 from = pcbddc->vec1_P; 1908 to = pcbddc->coarse_vec; 1909 } 1910 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1911 if (smode == SCATTER_FORWARD) { 1912 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1913 Vec tvec; 1914 1915 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1916 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1917 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1918 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1919 } 1920 } else { 1921 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1922 ierr = VecResetArray(from);CHKERRQ(ierr); 1923 } 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 /* uncomment for testing purposes */ 1929 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1932 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1933 { 1934 PetscErrorCode ierr; 1935 PC_IS* pcis = (PC_IS*)(pc->data); 1936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1937 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1938 /* one and zero */ 1939 PetscScalar one=1.0,zero=0.0; 1940 /* space to store constraints and their local indices */ 1941 PetscScalar *constraints_data; 1942 PetscInt *constraints_idxs,*constraints_idxs_B; 1943 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1944 PetscInt *constraints_n; 1945 /* iterators */ 1946 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1947 /* BLAS integers */ 1948 PetscBLASInt lwork,lierr; 1949 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1950 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1951 /* reuse */ 1952 PetscInt olocal_primal_size,olocal_primal_size_cc; 1953 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1954 /* change of basis */ 1955 PetscBool qr_needed; 1956 PetscBT change_basis,qr_needed_idx; 1957 /* auxiliary stuff */ 1958 PetscInt *nnz,*is_indices; 1959 PetscInt ncc; 1960 /* some quantities */ 1961 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1962 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1963 1964 PetscFunctionBegin; 1965 /* Destroy Mat objects computed previously */ 1966 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1967 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1968 /* save info on constraints from previous setup (if any) */ 1969 olocal_primal_size = pcbddc->local_primal_size; 1970 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1971 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1972 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1973 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1974 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1975 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1976 1977 /* print some info */ 1978 if (pcbddc->dbg_flag) { 1979 IS vertices; 1980 PetscInt nv,nedges,nfaces; 1981 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1982 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1983 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1984 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1990 } 1991 1992 if (!pcbddc->adaptive_selection) { 1993 IS ISForVertices,*ISForFaces,*ISForEdges; 1994 MatNullSpace nearnullsp; 1995 const Vec *nearnullvecs; 1996 Vec *localnearnullsp; 1997 PetscScalar *array; 1998 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1999 PetscBool nnsp_has_cnst; 2000 /* LAPACK working arrays for SVD or POD */ 2001 PetscBool skip_lapack,boolforchange; 2002 PetscScalar *work; 2003 PetscReal *singular_vals; 2004 #if defined(PETSC_USE_COMPLEX) 2005 PetscReal *rwork; 2006 #endif 2007 #if defined(PETSC_MISSING_LAPACK_GESVD) 2008 PetscScalar *temp_basis,*correlation_mat; 2009 #else 2010 PetscBLASInt dummy_int=1; 2011 PetscScalar dummy_scalar=1.; 2012 #endif 2013 2014 /* Get index sets for faces, edges and vertices from graph */ 2015 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2016 /* free unneeded index sets */ 2017 if (!pcbddc->use_vertices) { 2018 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2019 } 2020 if (!pcbddc->use_edges) { 2021 for (i=0;i<n_ISForEdges;i++) { 2022 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2023 } 2024 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2025 n_ISForEdges = 0; 2026 } 2027 if (!pcbddc->use_faces) { 2028 for (i=0;i<n_ISForFaces;i++) { 2029 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2032 n_ISForFaces = 0; 2033 } 2034 2035 #if defined(PETSC_USE_DEBUG) 2036 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2037 Also use_change_of_basis should be consistent among processors */ 2038 if (pcbddc->NullSpace) { 2039 PetscBool tbool[2],gbool[2]; 2040 2041 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2042 pcbddc->use_change_of_basis = PETSC_TRUE; 2043 if (!ISForEdges) { 2044 pcbddc->use_change_on_faces = PETSC_TRUE; 2045 } 2046 } 2047 tbool[0] = pcbddc->use_change_of_basis; 2048 tbool[1] = pcbddc->use_change_on_faces; 2049 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2050 pcbddc->use_change_of_basis = gbool[0]; 2051 pcbddc->use_change_on_faces = gbool[1]; 2052 } 2053 #endif 2054 2055 /* check if near null space is attached to global mat */ 2056 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2057 if (nearnullsp) { 2058 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2059 /* remove any stored info */ 2060 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2061 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2062 /* store information for BDDC solver reuse */ 2063 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2064 pcbddc->onearnullspace = nearnullsp; 2065 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2066 for (i=0;i<nnsp_size;i++) { 2067 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2068 } 2069 } else { /* if near null space is not provided BDDC uses constants by default */ 2070 nnsp_size = 0; 2071 nnsp_has_cnst = PETSC_TRUE; 2072 } 2073 /* get max number of constraints on a single cc */ 2074 max_constraints = nnsp_size; 2075 if (nnsp_has_cnst) max_constraints++; 2076 2077 /* 2078 Evaluate maximum storage size needed by the procedure 2079 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2080 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2081 There can be multiple constraints per connected component 2082 */ 2083 n_vertices = 0; 2084 if (ISForVertices) { 2085 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2086 } 2087 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2088 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2089 2090 total_counts = n_ISForFaces+n_ISForEdges; 2091 total_counts *= max_constraints; 2092 total_counts += n_vertices; 2093 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2094 2095 total_counts = 0; 2096 max_size_of_constraint = 0; 2097 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2098 IS used_is; 2099 if (i<n_ISForEdges) { 2100 used_is = ISForEdges[i]; 2101 } else { 2102 used_is = ISForFaces[i-n_ISForEdges]; 2103 } 2104 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2105 total_counts += j; 2106 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2107 } 2108 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2109 2110 /* get local part of global near null space vectors */ 2111 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2112 for (k=0;k<nnsp_size;k++) { 2113 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2114 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 } 2117 2118 /* whether or not to skip lapack calls */ 2119 skip_lapack = PETSC_TRUE; 2120 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2121 2122 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2123 if (!skip_lapack) { 2124 PetscScalar temp_work; 2125 2126 #if defined(PETSC_MISSING_LAPACK_GESVD) 2127 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2128 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2129 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2130 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2131 #if defined(PETSC_USE_COMPLEX) 2132 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2133 #endif 2134 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2135 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2137 lwork = -1; 2138 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2139 #if !defined(PETSC_USE_COMPLEX) 2140 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2141 #else 2142 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2143 #endif 2144 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2145 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2146 #else /* on missing GESVD */ 2147 /* SVD */ 2148 PetscInt max_n,min_n; 2149 max_n = max_size_of_constraint; 2150 min_n = max_constraints; 2151 if (max_size_of_constraint < max_constraints) { 2152 min_n = max_size_of_constraint; 2153 max_n = max_constraints; 2154 } 2155 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2156 #if defined(PETSC_USE_COMPLEX) 2157 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2158 #endif 2159 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2160 lwork = -1; 2161 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2162 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2164 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2165 #if !defined(PETSC_USE_COMPLEX) 2166 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2167 #else 2168 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2169 #endif 2170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2171 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2172 #endif /* on missing GESVD */ 2173 /* Allocate optimal workspace */ 2174 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2175 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2176 } 2177 /* Now we can loop on constraining sets */ 2178 total_counts = 0; 2179 constraints_idxs_ptr[0] = 0; 2180 constraints_data_ptr[0] = 0; 2181 /* vertices */ 2182 if (n_vertices) { 2183 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2184 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2185 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2186 for (i=0;i<n_vertices;i++) { 2187 constraints_n[total_counts] = 1; 2188 constraints_data[total_counts] = 1.0; 2189 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2190 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2191 total_counts++; 2192 } 2193 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2194 PetscBool used_vertex; 2195 for (i=0;i<n_vertices;i++) { 2196 used_vertex = PETSC_FALSE; 2197 k = 0; 2198 while (!used_vertex && k<nnsp_size) { 2199 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2200 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2201 constraints_n[total_counts] = 1; 2202 constraints_idxs[total_counts] = is_indices[i]; 2203 constraints_data[total_counts] = 1.0; 2204 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2205 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2206 total_counts++; 2207 used_vertex = PETSC_TRUE; 2208 } 2209 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2210 k++; 2211 } 2212 } 2213 } 2214 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2215 n_vertices = total_counts; 2216 } 2217 2218 /* edges and faces */ 2219 total_counts_cc = total_counts; 2220 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2221 IS used_is; 2222 PetscBool idxs_copied = PETSC_FALSE; 2223 2224 if (ncc<n_ISForEdges) { 2225 used_is = ISForEdges[ncc]; 2226 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2227 } else { 2228 used_is = ISForFaces[ncc-n_ISForEdges]; 2229 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2230 } 2231 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2232 2233 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2234 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2235 /* change of basis should not be performed on local periodic nodes */ 2236 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2237 if (nnsp_has_cnst) { 2238 PetscScalar quad_value; 2239 2240 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2241 idxs_copied = PETSC_TRUE; 2242 2243 if (!pcbddc->use_nnsp_true) { 2244 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2245 } else { 2246 quad_value = 1.0; 2247 } 2248 for (j=0;j<size_of_constraint;j++) { 2249 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2250 } 2251 temp_constraints++; 2252 total_counts++; 2253 } 2254 for (k=0;k<nnsp_size;k++) { 2255 PetscReal real_value; 2256 PetscScalar *ptr_to_data; 2257 2258 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2259 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2260 for (j=0;j<size_of_constraint;j++) { 2261 ptr_to_data[j] = array[is_indices[j]]; 2262 } 2263 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2264 /* check if array is null on the connected component */ 2265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2266 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2267 if (real_value > 0.0) { /* keep indices and values */ 2268 temp_constraints++; 2269 total_counts++; 2270 if (!idxs_copied) { 2271 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2272 idxs_copied = PETSC_TRUE; 2273 } 2274 } 2275 } 2276 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2277 valid_constraints = temp_constraints; 2278 if (!pcbddc->use_nnsp_true && temp_constraints) { 2279 if (temp_constraints == 1) { /* just normalize the constraint */ 2280 PetscScalar norm,*ptr_to_data; 2281 2282 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2284 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2285 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2286 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2287 } else { /* perform SVD */ 2288 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2289 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2290 2291 #if defined(PETSC_MISSING_LAPACK_GESVD) 2292 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2293 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2294 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2295 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2296 from that computed using LAPACKgesvd 2297 -> This is due to a different computation of eigenvectors in LAPACKheev 2298 -> The quality of the POD-computed basis will be the same */ 2299 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2300 /* Store upper triangular part of correlation matrix */ 2301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2303 for (j=0;j<temp_constraints;j++) { 2304 for (k=0;k<j+1;k++) { 2305 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2306 } 2307 } 2308 /* compute eigenvalues and eigenvectors of correlation matrix */ 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2311 #if !defined(PETSC_USE_COMPLEX) 2312 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2313 #else 2314 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2315 #endif 2316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2317 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2318 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2319 j = 0; 2320 while (j < temp_constraints && singular_vals[j] < tol) j++; 2321 total_counts = total_counts-j; 2322 valid_constraints = temp_constraints-j; 2323 /* scale and copy POD basis into used quadrature memory */ 2324 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2325 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2330 if (j<temp_constraints) { 2331 PetscInt ii; 2332 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2333 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2334 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2335 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2336 for (k=0;k<temp_constraints-j;k++) { 2337 for (ii=0;ii<size_of_constraint;ii++) { 2338 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2339 } 2340 } 2341 } 2342 #else /* on missing GESVD */ 2343 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2344 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2347 #if !defined(PETSC_USE_COMPLEX) 2348 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2349 #else 2350 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2351 #endif 2352 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2354 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2355 k = temp_constraints; 2356 if (k > size_of_constraint) k = size_of_constraint; 2357 j = 0; 2358 while (j < k && singular_vals[k-j-1] < tol) j++; 2359 valid_constraints = k-j; 2360 total_counts = total_counts-temp_constraints+valid_constraints; 2361 #endif /* on missing GESVD */ 2362 } 2363 } 2364 /* update pointers information */ 2365 if (valid_constraints) { 2366 constraints_n[total_counts_cc] = valid_constraints; 2367 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2368 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2369 /* set change_of_basis flag */ 2370 if (boolforchange) { 2371 PetscBTSet(change_basis,total_counts_cc); 2372 } 2373 total_counts_cc++; 2374 } 2375 } 2376 /* free workspace */ 2377 if (!skip_lapack) { 2378 ierr = PetscFree(work);CHKERRQ(ierr); 2379 #if defined(PETSC_USE_COMPLEX) 2380 ierr = PetscFree(rwork);CHKERRQ(ierr); 2381 #endif 2382 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2383 #if defined(PETSC_MISSING_LAPACK_GESVD) 2384 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2385 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2386 #endif 2387 } 2388 for (k=0;k<nnsp_size;k++) { 2389 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2390 } 2391 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2392 /* free index sets of faces, edges and vertices */ 2393 for (i=0;i<n_ISForFaces;i++) { 2394 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2395 } 2396 if (n_ISForFaces) { 2397 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2398 } 2399 for (i=0;i<n_ISForEdges;i++) { 2400 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2401 } 2402 if (n_ISForEdges) { 2403 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2404 } 2405 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2406 } else { 2407 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2408 2409 total_counts = 0; 2410 n_vertices = 0; 2411 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2412 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2413 } 2414 max_constraints = 0; 2415 total_counts_cc = 0; 2416 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2417 total_counts += pcbddc->adaptive_constraints_n[i]; 2418 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2419 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2420 } 2421 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2422 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2423 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2424 constraints_data = pcbddc->adaptive_constraints_data; 2425 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2426 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2427 total_counts_cc = 0; 2428 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2429 if (pcbddc->adaptive_constraints_n[i]) { 2430 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2431 } 2432 } 2433 #if 0 2434 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2435 for (i=0;i<total_counts_cc;i++) { 2436 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2437 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2438 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2439 printf(" %d",constraints_idxs[j]); 2440 } 2441 printf("\n"); 2442 printf("number of cc: %d\n",constraints_n[i]); 2443 } 2444 for (i=0;i<n_vertices;i++) { 2445 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2446 } 2447 for (i=0;i<sub_schurs->n_subs;i++) { 2448 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2449 } 2450 #endif 2451 2452 max_size_of_constraint = 0; 2453 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2454 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2455 /* Change of basis */ 2456 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2457 if (pcbddc->use_change_of_basis) { 2458 for (i=0;i<sub_schurs->n_subs;i++) { 2459 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2460 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2461 } 2462 } 2463 } 2464 } 2465 pcbddc->local_primal_size = total_counts; 2466 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2467 2468 /* map constraints_idxs in boundary numbering */ 2469 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2470 if (i != constraints_idxs_ptr[total_counts_cc]) { 2471 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2472 } 2473 2474 /* Create constraint matrix */ 2475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2476 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2477 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2478 2479 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2480 /* determine if a QR strategy is needed for change of basis */ 2481 qr_needed = PETSC_FALSE; 2482 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2483 total_primal_vertices=0; 2484 pcbddc->local_primal_size_cc = 0; 2485 for (i=0;i<total_counts_cc;i++) { 2486 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2487 if (size_of_constraint == 1) { 2488 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2489 pcbddc->local_primal_size_cc += 1; 2490 } else if (PetscBTLookup(change_basis,i)) { 2491 for (k=0;k<constraints_n[i];k++) { 2492 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2493 } 2494 pcbddc->local_primal_size_cc += constraints_n[i]; 2495 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2496 PetscBTSet(qr_needed_idx,i); 2497 qr_needed = PETSC_TRUE; 2498 } 2499 } else { 2500 pcbddc->local_primal_size_cc += 1; 2501 } 2502 } 2503 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2504 pcbddc->n_vertices = total_primal_vertices; 2505 /* permute indices in order to have a sorted set of vertices */ 2506 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2507 2508 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2509 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2510 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2511 2512 /* nonzero structure of constraint matrix */ 2513 /* and get reference dof for local constraints */ 2514 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2515 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2516 2517 j = total_primal_vertices; 2518 total_counts = total_primal_vertices; 2519 cum = total_primal_vertices; 2520 for (i=n_vertices;i<total_counts_cc;i++) { 2521 if (!PetscBTLookup(change_basis,i)) { 2522 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2523 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2524 cum++; 2525 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2526 for (k=0;k<constraints_n[i];k++) { 2527 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2528 nnz[j+k] = size_of_constraint; 2529 } 2530 j += constraints_n[i]; 2531 } 2532 } 2533 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2534 ierr = PetscFree(nnz);CHKERRQ(ierr); 2535 2536 /* set values in constraint matrix */ 2537 for (i=0;i<total_primal_vertices;i++) { 2538 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2539 } 2540 total_counts = total_primal_vertices; 2541 for (i=n_vertices;i<total_counts_cc;i++) { 2542 if (!PetscBTLookup(change_basis,i)) { 2543 PetscInt *cols; 2544 2545 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2546 cols = constraints_idxs+constraints_idxs_ptr[i]; 2547 for (k=0;k<constraints_n[i];k++) { 2548 PetscInt row = total_counts+k; 2549 PetscScalar *vals; 2550 2551 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2552 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2553 } 2554 total_counts += constraints_n[i]; 2555 } 2556 } 2557 /* assembling */ 2558 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 2561 /* 2562 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2563 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2564 */ 2565 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2566 if (pcbddc->use_change_of_basis) { 2567 /* dual and primal dofs on a single cc */ 2568 PetscInt dual_dofs,primal_dofs; 2569 /* working stuff for GEQRF */ 2570 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2571 PetscBLASInt lqr_work; 2572 /* working stuff for UNGQR */ 2573 PetscScalar *gqr_work,lgqr_work_t; 2574 PetscBLASInt lgqr_work; 2575 /* working stuff for TRTRS */ 2576 PetscScalar *trs_rhs; 2577 PetscBLASInt Blas_NRHS; 2578 /* pointers for values insertion into change of basis matrix */ 2579 PetscInt *start_rows,*start_cols; 2580 PetscScalar *start_vals; 2581 /* working stuff for values insertion */ 2582 PetscBT is_primal; 2583 PetscInt *aux_primal_numbering_B; 2584 /* matrix sizes */ 2585 PetscInt global_size,local_size; 2586 /* temporary change of basis */ 2587 Mat localChangeOfBasisMatrix; 2588 /* extra space for debugging */ 2589 PetscScalar *dbg_work; 2590 2591 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2592 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2593 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2594 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2595 /* nonzeros for local mat */ 2596 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2597 for (i=0;i<pcis->n;i++) nnz[i]=1; 2598 for (i=n_vertices;i<total_counts_cc;i++) { 2599 if (PetscBTLookup(change_basis,i)) { 2600 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2601 if (PetscBTLookup(qr_needed_idx,i)) { 2602 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2603 } else { 2604 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2605 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2606 } 2607 } 2608 } 2609 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2610 ierr = PetscFree(nnz);CHKERRQ(ierr); 2611 /* Set initial identity in the matrix */ 2612 for (i=0;i<pcis->n;i++) { 2613 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2614 } 2615 2616 if (pcbddc->dbg_flag) { 2617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2619 } 2620 2621 2622 /* Now we loop on the constraints which need a change of basis */ 2623 /* 2624 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2625 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2626 2627 Basic blocks of change of basis matrix T computed by 2628 2629 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2630 2631 | 1 0 ... 0 s_1/S | 2632 | 0 1 ... 0 s_2/S | 2633 | ... | 2634 | 0 ... 1 s_{n-1}/S | 2635 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2636 2637 with S = \sum_{i=1}^n s_i^2 2638 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2639 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2640 2641 - QR decomposition of constraints otherwise 2642 */ 2643 if (qr_needed) { 2644 /* space to store Q */ 2645 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2646 /* first we issue queries for optimal work */ 2647 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2648 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2650 lqr_work = -1; 2651 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2652 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2653 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2654 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2655 lgqr_work = -1; 2656 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2657 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2658 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2659 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2660 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2661 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2662 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2663 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2664 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2665 /* array to store scaling factors for reflectors */ 2666 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2667 /* array to store rhs and solution of triangular solver */ 2668 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2669 /* allocating workspace for check */ 2670 if (pcbddc->dbg_flag) { 2671 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2672 } 2673 } 2674 /* array to store whether a node is primal or not */ 2675 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2676 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2677 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2678 if (i != total_primal_vertices) { 2679 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2680 } 2681 for (i=0;i<total_primal_vertices;i++) { 2682 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2683 } 2684 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2685 2686 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2687 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2688 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2689 if (PetscBTLookup(change_basis,total_counts)) { 2690 /* get constraint info */ 2691 primal_dofs = constraints_n[total_counts]; 2692 dual_dofs = size_of_constraint-primal_dofs; 2693 2694 if (pcbddc->dbg_flag) { 2695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2696 } 2697 2698 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2699 2700 /* copy quadrature constraints for change of basis check */ 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2703 } 2704 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2705 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2706 2707 /* compute QR decomposition of constraints */ 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2712 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2713 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2715 2716 /* explictly compute R^-T */ 2717 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2718 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2719 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2723 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2724 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2725 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2726 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2727 2728 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2735 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2736 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2737 2738 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2739 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2740 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2741 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2742 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2743 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2747 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2748 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2749 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2750 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2751 2752 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2753 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2754 /* insert cols for primal dofs */ 2755 for (j=0;j<primal_dofs;j++) { 2756 start_vals = &qr_basis[j*size_of_constraint]; 2757 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2758 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2759 } 2760 /* insert cols for dual dofs */ 2761 for (j=0,k=0;j<dual_dofs;k++) { 2762 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2763 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2764 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2765 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2766 j++; 2767 } 2768 } 2769 2770 /* check change of basis */ 2771 if (pcbddc->dbg_flag) { 2772 PetscInt ii,jj; 2773 PetscBool valid_qr=PETSC_TRUE; 2774 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2775 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2776 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2780 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2781 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2782 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2783 for (jj=0;jj<size_of_constraint;jj++) { 2784 for (ii=0;ii<primal_dofs;ii++) { 2785 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2786 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2787 } 2788 } 2789 if (!valid_qr) { 2790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2791 for (jj=0;jj<size_of_constraint;jj++) { 2792 for (ii=0;ii<primal_dofs;ii++) { 2793 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2794 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2795 } 2796 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2797 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2798 } 2799 } 2800 } 2801 } else { 2802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2803 } 2804 } 2805 } else { /* simple transformation block */ 2806 PetscInt row,col; 2807 PetscScalar val,norm; 2808 2809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2810 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2811 for (j=0;j<size_of_constraint;j++) { 2812 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2813 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2814 if (!PetscBTLookup(is_primal,row_B)) { 2815 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2816 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2817 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2818 } else { 2819 for (k=0;k<size_of_constraint;k++) { 2820 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2821 if (row != col) { 2822 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2823 } else { 2824 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2825 } 2826 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2827 } 2828 } 2829 } 2830 if (pcbddc->dbg_flag) { 2831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2832 } 2833 } 2834 } else { 2835 if (pcbddc->dbg_flag) { 2836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2837 } 2838 } 2839 } 2840 2841 /* free workspace */ 2842 if (qr_needed) { 2843 if (pcbddc->dbg_flag) { 2844 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2845 } 2846 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2847 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2848 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2849 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2850 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2851 } 2852 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2853 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 2856 /* assembling of global change of variable */ 2857 { 2858 Mat tmat; 2859 PetscInt bs; 2860 2861 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2862 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2863 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2864 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2865 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2867 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2868 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2869 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2870 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2871 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2872 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2873 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2874 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2875 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2876 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2878 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2879 } 2880 /* check */ 2881 if (pcbddc->dbg_flag) { 2882 PetscReal error; 2883 Vec x,x_change; 2884 2885 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2886 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2887 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2888 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2889 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2895 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2896 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2898 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2899 ierr = VecDestroy(&x);CHKERRQ(ierr); 2900 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2901 } 2902 2903 /* adapt sub_schurs computed (if any) */ 2904 if (pcbddc->use_deluxe_scaling) { 2905 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2906 if (sub_schurs->S_Ej_all) { 2907 Mat S_new,tmat; 2908 IS is_all_N; 2909 2910 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2911 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2912 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2913 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2914 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2915 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2916 sub_schurs->S_Ej_all = S_new; 2917 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2918 if (sub_schurs->sum_S_Ej_all) { 2919 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2920 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2921 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2922 sub_schurs->sum_S_Ej_all = S_new; 2923 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2924 } 2925 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2926 } 2927 } 2928 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2929 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2930 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2931 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2932 } 2933 2934 /* set up change of basis context */ 2935 if (pcbddc->ChangeOfBasisMatrix) { 2936 PCBDDCChange_ctx change_ctx; 2937 2938 if (!pcbddc->new_global_mat) { 2939 PetscInt global_size,local_size; 2940 2941 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2942 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2944 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2945 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2946 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2947 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2948 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2949 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2950 } else { 2951 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2952 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2953 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2954 } 2955 if (!pcbddc->user_ChangeOfBasisMatrix) { 2956 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2957 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2958 } else { 2959 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2960 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2961 } 2962 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2963 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2964 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 } 2967 2968 /* check if a new primal space has been introduced */ 2969 pcbddc->new_primal_space_local = PETSC_TRUE; 2970 if (olocal_primal_size == pcbddc->local_primal_size) { 2971 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2972 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2973 if (!pcbddc->new_primal_space_local) { 2974 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2975 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2976 } 2977 } 2978 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2979 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2980 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2981 2982 /* flush dbg viewer */ 2983 if (pcbddc->dbg_flag) { 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 } 2986 2987 /* free workspace */ 2988 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2989 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2990 if (!pcbddc->adaptive_selection) { 2991 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2992 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2993 } else { 2994 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2995 pcbddc->adaptive_constraints_idxs_ptr, 2996 pcbddc->adaptive_constraints_data_ptr, 2997 pcbddc->adaptive_constraints_idxs, 2998 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2999 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3000 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3007 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3008 { 3009 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3010 PC_IS *pcis = (PC_IS*)pc->data; 3011 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3012 PetscInt ierr,i,vertex_size,N; 3013 PetscViewer viewer=pcbddc->dbg_viewer; 3014 3015 PetscFunctionBegin; 3016 /* Reset previously computed graph */ 3017 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3018 /* Init local Graph struct */ 3019 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3020 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3021 3022 /* Check validity of the csr graph passed in by the user */ 3023 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3024 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3025 } 3026 3027 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3028 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3029 PetscInt *xadj,*adjncy; 3030 PetscInt nvtxs; 3031 PetscBool flg_row=PETSC_FALSE; 3032 3033 if (pcbddc->use_local_adj) { 3034 3035 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3036 if (flg_row) { 3037 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3038 pcbddc->computed_rowadj = PETSC_TRUE; 3039 } 3040 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3041 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3042 IS is_dummy; 3043 ISLocalToGlobalMapping l2gmap_dummy; 3044 PetscInt j,sum; 3045 PetscInt *cxadj,*cadjncy; 3046 const PetscInt *idxs; 3047 PCBDDCGraph graph; 3048 PetscBT is_on_boundary; 3049 3050 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3051 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3052 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3053 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3054 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3055 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3056 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3057 if (flg_row) { 3058 graph->xadj = xadj; 3059 graph->adjncy = adjncy; 3060 } 3061 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3062 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3063 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3064 3065 if (pcbddc->dbg_flag) { 3066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3067 for (i=0;i<graph->ncc;i++) { 3068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3069 } 3070 } 3071 3072 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3073 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3074 for (i=0;i<pcis->n_B;i++) { 3075 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3076 } 3077 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3078 3079 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3080 sum = 0; 3081 for (i=0;i<graph->ncc;i++) { 3082 PetscInt sizecc = 0; 3083 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3084 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3085 sizecc++; 3086 } 3087 } 3088 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3089 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3090 cxadj[graph->queue[j]] = sizecc; 3091 } 3092 } 3093 sum += sizecc*sizecc; 3094 } 3095 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3096 sum = 0; 3097 for (i=0;i<pcis->n;i++) { 3098 PetscInt temp = cxadj[i]; 3099 cxadj[i] = sum; 3100 sum += temp; 3101 } 3102 cxadj[pcis->n] = sum; 3103 for (i=0;i<graph->ncc;i++) { 3104 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3105 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3106 PetscInt k,sizecc = 0; 3107 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3108 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3109 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3110 sizecc++; 3111 } 3112 } 3113 } 3114 } 3115 } 3116 if (sum) { 3117 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3118 } else { 3119 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3120 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3121 } 3122 graph->xadj = 0; 3123 graph->adjncy = 0; 3124 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3125 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3126 } 3127 } 3128 if (pcbddc->dbg_flag) { 3129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3130 } 3131 3132 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3133 vertex_size = 1; 3134 if (pcbddc->user_provided_isfordofs) { 3135 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3136 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3137 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3138 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3139 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3140 } 3141 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3142 pcbddc->n_ISForDofs = 0; 3143 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3144 } 3145 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3146 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3147 } else { 3148 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3149 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3150 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3151 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3152 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3153 } 3154 } 3155 } 3156 3157 /* Setup of Graph */ 3158 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3159 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3160 } 3161 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3162 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3163 } 3164 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3165 3166 /* Graph's connected components analysis */ 3167 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3168 3169 /* print some info to stdout */ 3170 if (pcbddc->dbg_flag) { 3171 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3172 } 3173 3174 /* mark topography has done */ 3175 pcbddc->recompute_topography = PETSC_FALSE; 3176 PetscFunctionReturn(0); 3177 } 3178 3179 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCSubsetNumbering" 3182 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3183 { 3184 PetscSF sf; 3185 PetscLayout map; 3186 const PetscInt *idxs; 3187 PetscInt *leaf_data,*root_data,*gidxs; 3188 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3189 PetscInt n_n,nlocals,start,first_index; 3190 PetscMPIInt commsize; 3191 PetscBool first_found; 3192 PetscErrorCode ierr; 3193 3194 PetscFunctionBegin; 3195 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3196 if (subset_mult) { 3197 PetscCheckSameComm(subset,1,subset_mult,2); 3198 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3199 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3200 } 3201 /* create workspace layout for computing global indices of subset */ 3202 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3203 lbounds[0] = lbounds[1] = 0; 3204 for (i=0;i<n;i++) { 3205 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3206 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3207 } 3208 lbounds[0] = -lbounds[0]; 3209 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3210 gbounds[0] = -gbounds[0]; 3211 N = gbounds[1] - gbounds[0] + 1; 3212 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3213 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3215 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3216 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3217 3218 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3219 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3220 if (subset_mult) { 3221 const PetscInt* idxs_mult; 3222 3223 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3224 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3225 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3226 } else { 3227 for (i=0;i<n;i++) leaf_data[i] = 1; 3228 } 3229 /* local size of new subset */ 3230 n_n = 0; 3231 for (i=0;i<n;i++) n_n += leaf_data[i]; 3232 3233 /* global indexes in layout */ 3234 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3235 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3236 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3237 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3238 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3239 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3240 3241 /* reduce from leaves to roots */ 3242 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3243 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3244 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3245 3246 /* count indexes in local part of layout */ 3247 nlocals = 0; 3248 first_index = -1; 3249 first_found = PETSC_FALSE; 3250 for (i=0;i<Nl;i++) { 3251 if (!first_found && root_data[i]) { 3252 first_found = PETSC_TRUE; 3253 first_index = i; 3254 } 3255 nlocals += root_data[i]; 3256 } 3257 3258 /* cumulative of number of indexes and size of subset without holes */ 3259 start = 0; 3260 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3261 if (N_n) { /* compute total size of new subset if requested */ 3262 *N_n = start + nlocals; 3263 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3264 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3265 } 3266 /* adapt roof data with cumulative */ 3267 if (first_found) { 3268 PetscInt old_index; 3269 3270 root_data[first_index] += start; 3271 old_index = first_index; 3272 for (i=first_index+1;i<Nl;i++) { 3273 if (root_data[i]) { 3274 root_data[i] += root_data[old_index]; 3275 old_index = i; 3276 } 3277 } 3278 } 3279 3280 /* from roots to leaves */ 3281 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3282 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3283 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3284 3285 /* create new IS with global indexes without holes */ 3286 if (subset_mult) { 3287 const PetscInt* idxs_mult; 3288 PetscInt cum; 3289 3290 cum = 0; 3291 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3292 for (i=0;i<n;i++) { 3293 PetscInt j; 3294 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3295 } 3296 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3297 } else { 3298 for (i=0;i<n;i++) { 3299 gidxs[i] = leaf_data[i]-1; 3300 } 3301 } 3302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3303 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3309 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3310 { 3311 PetscInt i,j; 3312 PetscScalar *alphas; 3313 PetscErrorCode ierr; 3314 3315 PetscFunctionBegin; 3316 /* this implements stabilized Gram-Schmidt */ 3317 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3318 for (i=0;i<n;i++) { 3319 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3320 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3321 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3322 } 3323 ierr = PetscFree(alphas);CHKERRQ(ierr); 3324 PetscFunctionReturn(0); 3325 } 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatISGetSubassemblingPattern" 3329 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3330 { 3331 Mat_IS *matis; 3332 IS ranks_send_to; 3333 PetscInt n_neighs,*neighs,*n_shared,**shared; 3334 PetscMPIInt size,rank,color; 3335 PetscInt *xadj,*adjncy; 3336 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3337 PetscInt i,local_size,threshold=0; 3338 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3339 PetscSubcomm subcomm; 3340 PetscErrorCode ierr; 3341 3342 PetscFunctionBegin; 3343 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3344 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3345 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3346 3347 /* Get info on mapping */ 3348 matis = (Mat_IS*)(mat->data); 3349 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3350 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3351 3352 /* build local CSR graph of subdomains' connectivity */ 3353 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3354 xadj[0] = 0; 3355 xadj[1] = PetscMax(n_neighs-1,0); 3356 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3357 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3358 3359 if (threshold) { 3360 PetscInt xadj_count = 0; 3361 for (i=1;i<n_neighs;i++) { 3362 if (n_shared[i] > threshold) { 3363 adjncy[xadj_count] = neighs[i]; 3364 adjncy_wgt[xadj_count] = n_shared[i]; 3365 xadj_count++; 3366 } 3367 } 3368 xadj[1] = xadj_count; 3369 } else { 3370 if (xadj[1]) { 3371 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3372 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3373 } 3374 } 3375 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3376 if (use_square) { 3377 for (i=0;i<xadj[1];i++) { 3378 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3379 } 3380 } 3381 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3382 3383 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3384 3385 /* 3386 Restrict work on active processes only. 3387 */ 3388 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3389 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3390 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3391 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3392 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3393 if (color) { 3394 ierr = PetscFree(xadj);CHKERRQ(ierr); 3395 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3396 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3397 } else { 3398 Mat subdomain_adj; 3399 IS new_ranks,new_ranks_contig; 3400 MatPartitioning partitioner; 3401 PetscInt prank,rstart=0,rend=0; 3402 PetscInt *is_indices,*oldranks; 3403 PetscBool aggregate; 3404 3405 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3406 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3407 prank = rank; 3408 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3409 /* 3410 for (i=0;i<size;i++) { 3411 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3412 } 3413 */ 3414 for (i=0;i<xadj[1];i++) { 3415 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3416 } 3417 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3418 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3419 if (aggregate) { 3420 PetscInt lrows,row,ncols,*cols; 3421 PetscMPIInt nrank; 3422 PetscScalar *vals; 3423 3424 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3425 lrows = 0; 3426 if (nrank<redprocs) { 3427 lrows = size/redprocs; 3428 if (nrank<size%redprocs) lrows++; 3429 } 3430 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,100,NULL,100,NULL,&subdomain_adj);CHKERRQ(ierr); 3431 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3432 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3433 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3434 row = nrank; 3435 ncols = xadj[1]-xadj[0]; 3436 cols = adjncy; 3437 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3438 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3439 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3440 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3441 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3442 ierr = PetscFree(xadj);CHKERRQ(ierr); 3443 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3444 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3445 ierr = PetscFree(vals);CHKERRQ(ierr); 3446 } else { 3447 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3448 } 3449 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3450 3451 /* Partition */ 3452 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3453 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3454 if (use_vwgt) { 3455 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3456 v_wgt[0] = local_size; 3457 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3458 } 3459 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3460 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3461 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3462 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3463 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3464 3465 /* renumber new_ranks to avoid "holes" in new set of processors */ 3466 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3467 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3468 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3469 if (!redprocs) { 3470 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3471 } else { 3472 PetscInt idxs[1]; 3473 PetscMPIInt tag; 3474 MPI_Request *reqs; 3475 3476 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3477 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3478 for (i=rstart;i<rend;i++) { 3479 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3480 } 3481 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3482 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3483 ierr = PetscFree(reqs);CHKERRQ(ierr); 3484 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3485 } 3486 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3487 /* clean up */ 3488 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3489 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3490 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3491 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3492 } 3493 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3494 3495 /* assemble parallel IS for sends */ 3496 i = 1; 3497 if (color) i=0; 3498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3499 /* get back IS */ 3500 *is_sends = ranks_send_to; 3501 PetscFunctionReturn(0); 3502 } 3503 3504 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatISSubassemble" 3508 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3509 { 3510 Mat local_mat; 3511 Mat_IS *matis; 3512 IS is_sends_internal; 3513 PetscInt rows,cols,new_local_rows; 3514 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3515 PetscBool ismatis,isdense,newisdense,destroy_mat; 3516 ISLocalToGlobalMapping l2gmap; 3517 PetscInt* l2gmap_indices; 3518 const PetscInt* is_indices; 3519 MatType new_local_type; 3520 /* buffers */ 3521 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3522 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3523 PetscInt *recv_buffer_idxs_local; 3524 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3525 /* MPI */ 3526 MPI_Comm comm,comm_n; 3527 PetscSubcomm subcomm; 3528 PetscMPIInt n_sends,n_recvs,commsize; 3529 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3530 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3531 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3532 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3533 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3534 PetscErrorCode ierr; 3535 3536 PetscFunctionBegin; 3537 /* TODO: add missing checks */ 3538 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3539 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3540 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3541 PetscValidLogicalCollectiveInt(mat,nis,7); 3542 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3543 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3544 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3545 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3546 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3547 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3548 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3549 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3550 PetscInt mrows,mcols,mnrows,mncols; 3551 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3552 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3553 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3554 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3555 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3556 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3557 } 3558 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3559 PetscValidLogicalCollectiveInt(mat,bs,0); 3560 /* prepare IS for sending if not provided */ 3561 if (!is_sends) { 3562 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3563 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3564 } else { 3565 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3566 is_sends_internal = is_sends; 3567 } 3568 3569 /* get pointer of MATIS data */ 3570 matis = (Mat_IS*)mat->data; 3571 3572 /* get comm */ 3573 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3574 3575 /* compute number of sends */ 3576 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3577 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3578 3579 /* compute number of receives */ 3580 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3581 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3582 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3583 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3584 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3585 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3586 ierr = PetscFree(iflags);CHKERRQ(ierr); 3587 3588 /* restrict comm if requested */ 3589 subcomm = 0; 3590 destroy_mat = PETSC_FALSE; 3591 if (restrict_comm) { 3592 PetscMPIInt color,subcommsize; 3593 3594 color = 0; 3595 if (restrict_full) { 3596 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3597 } else { 3598 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3599 } 3600 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3601 subcommsize = commsize - subcommsize; 3602 /* check if reuse has been requested */ 3603 if (reuse == MAT_REUSE_MATRIX) { 3604 if (*mat_n) { 3605 PetscMPIInt subcommsize2; 3606 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3607 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3608 comm_n = PetscObjectComm((PetscObject)*mat_n); 3609 } else { 3610 comm_n = PETSC_COMM_SELF; 3611 } 3612 } else { /* MAT_INITIAL_MATRIX */ 3613 PetscMPIInt rank; 3614 3615 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3616 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3617 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3618 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3619 comm_n = PetscSubcommChild(subcomm); 3620 } 3621 /* flag to destroy *mat_n if not significative */ 3622 if (color) destroy_mat = PETSC_TRUE; 3623 } else { 3624 comm_n = comm; 3625 } 3626 3627 /* prepare send/receive buffers */ 3628 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3629 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3630 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3631 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3632 if (nis) { 3633 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3634 } 3635 3636 /* Get data from local matrices */ 3637 if (!isdense) { 3638 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3639 /* TODO: See below some guidelines on how to prepare the local buffers */ 3640 /* 3641 send_buffer_vals should contain the raw values of the local matrix 3642 send_buffer_idxs should contain: 3643 - MatType_PRIVATE type 3644 - PetscInt size_of_l2gmap 3645 - PetscInt global_row_indices[size_of_l2gmap] 3646 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3647 */ 3648 } else { 3649 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3650 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3651 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3652 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3653 send_buffer_idxs[1] = i; 3654 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3655 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3656 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3657 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3658 for (i=0;i<n_sends;i++) { 3659 ilengths_vals[is_indices[i]] = len*len; 3660 ilengths_idxs[is_indices[i]] = len+2; 3661 } 3662 } 3663 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3664 /* additional is (if any) */ 3665 if (nis) { 3666 PetscMPIInt psum; 3667 PetscInt j; 3668 for (j=0,psum=0;j<nis;j++) { 3669 PetscInt plen; 3670 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3671 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3672 psum += len+1; /* indices + lenght */ 3673 } 3674 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3675 for (j=0,psum=0;j<nis;j++) { 3676 PetscInt plen; 3677 const PetscInt *is_array_idxs; 3678 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3679 send_buffer_idxs_is[psum] = plen; 3680 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3681 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3682 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3683 psum += plen+1; /* indices + lenght */ 3684 } 3685 for (i=0;i<n_sends;i++) { 3686 ilengths_idxs_is[is_indices[i]] = psum; 3687 } 3688 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3689 } 3690 3691 buf_size_idxs = 0; 3692 buf_size_vals = 0; 3693 buf_size_idxs_is = 0; 3694 for (i=0;i<n_recvs;i++) { 3695 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3696 buf_size_vals += (PetscInt)olengths_vals[i]; 3697 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3698 } 3699 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3700 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3701 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3702 3703 /* get new tags for clean communications */ 3704 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3705 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3706 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3707 3708 /* allocate for requests */ 3709 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3710 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3711 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3712 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3713 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3714 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3715 3716 /* communications */ 3717 ptr_idxs = recv_buffer_idxs; 3718 ptr_vals = recv_buffer_vals; 3719 ptr_idxs_is = recv_buffer_idxs_is; 3720 for (i=0;i<n_recvs;i++) { 3721 source_dest = onodes[i]; 3722 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3723 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3724 ptr_idxs += olengths_idxs[i]; 3725 ptr_vals += olengths_vals[i]; 3726 if (nis) { 3727 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3728 ptr_idxs_is += olengths_idxs_is[i]; 3729 } 3730 } 3731 for (i=0;i<n_sends;i++) { 3732 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3733 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3734 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3735 if (nis) { 3736 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3737 } 3738 } 3739 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3740 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3741 3742 /* assemble new l2g map */ 3743 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3744 ptr_idxs = recv_buffer_idxs; 3745 new_local_rows = 0; 3746 for (i=0;i<n_recvs;i++) { 3747 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3748 ptr_idxs += olengths_idxs[i]; 3749 } 3750 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3751 ptr_idxs = recv_buffer_idxs; 3752 new_local_rows = 0; 3753 for (i=0;i<n_recvs;i++) { 3754 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3755 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3756 ptr_idxs += olengths_idxs[i]; 3757 } 3758 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3759 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3760 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3761 3762 /* infer new local matrix type from received local matrices type */ 3763 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3764 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3765 if (n_recvs) { 3766 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3767 ptr_idxs = recv_buffer_idxs; 3768 for (i=0;i<n_recvs;i++) { 3769 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3770 new_local_type_private = MATAIJ_PRIVATE; 3771 break; 3772 } 3773 ptr_idxs += olengths_idxs[i]; 3774 } 3775 switch (new_local_type_private) { 3776 case MATDENSE_PRIVATE: 3777 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3778 new_local_type = MATSEQAIJ; 3779 bs = 1; 3780 } else { /* if I receive only 1 dense matrix */ 3781 new_local_type = MATSEQDENSE; 3782 bs = 1; 3783 } 3784 break; 3785 case MATAIJ_PRIVATE: 3786 new_local_type = MATSEQAIJ; 3787 bs = 1; 3788 break; 3789 case MATBAIJ_PRIVATE: 3790 new_local_type = MATSEQBAIJ; 3791 break; 3792 case MATSBAIJ_PRIVATE: 3793 new_local_type = MATSEQSBAIJ; 3794 break; 3795 default: 3796 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3797 break; 3798 } 3799 } else { /* by default, new_local_type is seqdense */ 3800 new_local_type = MATSEQDENSE; 3801 bs = 1; 3802 } 3803 3804 /* create MATIS object if needed */ 3805 if (reuse == MAT_INITIAL_MATRIX) { 3806 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3807 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3808 } else { 3809 /* it also destroys the local matrices */ 3810 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3811 } 3812 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3813 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3814 3815 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3816 3817 /* Global to local map of received indices */ 3818 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3819 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3820 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3821 3822 /* restore attributes -> type of incoming data and its size */ 3823 buf_size_idxs = 0; 3824 for (i=0;i<n_recvs;i++) { 3825 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3826 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3827 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3828 } 3829 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3830 3831 /* set preallocation */ 3832 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3833 if (!newisdense) { 3834 PetscInt *new_local_nnz=0; 3835 3836 ptr_vals = recv_buffer_vals; 3837 ptr_idxs = recv_buffer_idxs_local; 3838 if (n_recvs) { 3839 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3840 } 3841 for (i=0;i<n_recvs;i++) { 3842 PetscInt j; 3843 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3844 for (j=0;j<*(ptr_idxs+1);j++) { 3845 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3846 } 3847 } else { 3848 /* TODO */ 3849 } 3850 ptr_idxs += olengths_idxs[i]; 3851 } 3852 if (new_local_nnz) { 3853 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3854 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3855 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3856 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3857 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3858 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3859 } else { 3860 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3861 } 3862 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3863 } else { 3864 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3865 } 3866 3867 /* set values */ 3868 ptr_vals = recv_buffer_vals; 3869 ptr_idxs = recv_buffer_idxs_local; 3870 for (i=0;i<n_recvs;i++) { 3871 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3872 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3873 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3874 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3875 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3876 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3877 } else { 3878 /* TODO */ 3879 } 3880 ptr_idxs += olengths_idxs[i]; 3881 ptr_vals += olengths_vals[i]; 3882 } 3883 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3887 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3888 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3889 3890 #if 0 3891 if (!restrict_comm) { /* check */ 3892 Vec lvec,rvec; 3893 PetscReal infty_error; 3894 3895 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3896 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3897 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3898 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3899 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3900 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3901 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3902 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3903 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3904 } 3905 #endif 3906 3907 /* assemble new additional is (if any) */ 3908 if (nis) { 3909 PetscInt **temp_idxs,*count_is,j,psum; 3910 3911 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3912 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3913 ptr_idxs = recv_buffer_idxs_is; 3914 psum = 0; 3915 for (i=0;i<n_recvs;i++) { 3916 for (j=0;j<nis;j++) { 3917 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3918 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3919 psum += plen; 3920 ptr_idxs += plen+1; /* shift pointer to received data */ 3921 } 3922 } 3923 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3924 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3925 for (i=1;i<nis;i++) { 3926 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3927 } 3928 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3929 ptr_idxs = recv_buffer_idxs_is; 3930 for (i=0;i<n_recvs;i++) { 3931 for (j=0;j<nis;j++) { 3932 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3933 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3934 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3935 ptr_idxs += plen+1; /* shift pointer to received data */ 3936 } 3937 } 3938 for (i=0;i<nis;i++) { 3939 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3940 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3941 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3942 } 3943 ierr = PetscFree(count_is);CHKERRQ(ierr); 3944 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3945 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3946 } 3947 /* free workspace */ 3948 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3949 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3950 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3951 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3952 if (isdense) { 3953 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3954 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3955 } else { 3956 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3957 } 3958 if (nis) { 3959 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3960 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3961 } 3962 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3963 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3964 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3965 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3966 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3967 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3968 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3969 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3970 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3971 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3972 ierr = PetscFree(onodes);CHKERRQ(ierr); 3973 if (nis) { 3974 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3975 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3976 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3977 } 3978 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3979 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3980 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3981 for (i=0;i<nis;i++) { 3982 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3983 } 3984 *mat_n = NULL; 3985 } 3986 PetscFunctionReturn(0); 3987 } 3988 3989 /* temporary hack into ksp private data structure */ 3990 #include <petsc/private/kspimpl.h> 3991 3992 #undef __FUNCT__ 3993 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3994 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3995 { 3996 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3997 PC_IS *pcis = (PC_IS*)pc->data; 3998 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3999 MatNullSpace CoarseNullSpace=NULL; 4000 ISLocalToGlobalMapping coarse_islg; 4001 IS coarse_is,*isarray; 4002 PetscInt i,im_active=-1,active_procs=-1; 4003 PetscInt nis,nisdofs,nisneu; 4004 PC pc_temp; 4005 PCType coarse_pc_type; 4006 KSPType coarse_ksp_type; 4007 PetscBool multilevel_requested,multilevel_allowed; 4008 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4009 Mat t_coarse_mat_is; 4010 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4011 PetscMPIInt all_procs; 4012 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4013 PetscBool compute_vecs = PETSC_FALSE; 4014 PetscScalar *array; 4015 PetscErrorCode ierr; 4016 4017 PetscFunctionBegin; 4018 /* Assign global numbering to coarse dofs */ 4019 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4020 PetscInt ocoarse_size; 4021 compute_vecs = PETSC_TRUE; 4022 ocoarse_size = pcbddc->coarse_size; 4023 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4024 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4025 /* see if we can avoid some work */ 4026 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4027 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 4028 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4029 coarse_reuse = PETSC_FALSE; 4030 } else { /* we can safely reuse already computed coarse matrix */ 4031 coarse_reuse = PETSC_TRUE; 4032 } 4033 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4034 coarse_reuse = PETSC_FALSE; 4035 } 4036 /* reset any subassembling information */ 4037 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4038 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4039 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4040 coarse_reuse = PETSC_TRUE; 4041 } 4042 4043 /* count "active" (i.e. with positive local size) and "void" processes */ 4044 im_active = !!(pcis->n); 4045 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4046 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4047 void_procs = all_procs-active_procs; 4048 csin_type_simple = PETSC_TRUE; 4049 redist = PETSC_FALSE; 4050 if (pcbddc->current_level && void_procs) { 4051 csin_ml = PETSC_TRUE; 4052 ncoarse_ml = void_procs; 4053 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4054 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4055 csin_ds = PETSC_TRUE; 4056 ncoarse_ds = pcbddc->redistribute_coarse; 4057 redist = PETSC_TRUE; 4058 } else { 4059 csin_ds = PETSC_TRUE; 4060 ncoarse_ds = active_procs; 4061 redist = PETSC_TRUE; 4062 } 4063 } else { 4064 csin_ml = PETSC_FALSE; 4065 ncoarse_ml = all_procs; 4066 if (void_procs) { 4067 csin_ds = PETSC_TRUE; 4068 ncoarse_ds = void_procs; 4069 csin_type_simple = PETSC_FALSE; 4070 } else { 4071 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4072 csin_ds = PETSC_TRUE; 4073 ncoarse_ds = pcbddc->redistribute_coarse; 4074 redist = PETSC_TRUE; 4075 } else { 4076 csin_ds = PETSC_FALSE; 4077 ncoarse_ds = all_procs; 4078 } 4079 } 4080 } 4081 4082 /* 4083 test if we can go multilevel: three conditions must be satisfied: 4084 - we have not exceeded the number of levels requested 4085 - we can actually subassemble the active processes 4086 - we can find a suitable number of MPI processes where we can place the subassembled problem 4087 */ 4088 multilevel_allowed = PETSC_FALSE; 4089 multilevel_requested = PETSC_FALSE; 4090 if (pcbddc->current_level < pcbddc->max_levels) { 4091 multilevel_requested = PETSC_TRUE; 4092 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4093 multilevel_allowed = PETSC_FALSE; 4094 } else { 4095 multilevel_allowed = PETSC_TRUE; 4096 } 4097 } 4098 /* determine number of process partecipating to coarse solver */ 4099 if (multilevel_allowed) { 4100 ncoarse = ncoarse_ml; 4101 csin = csin_ml; 4102 redist = PETSC_FALSE; 4103 } else { 4104 ncoarse = ncoarse_ds; 4105 csin = csin_ds; 4106 } 4107 4108 /* creates temporary l2gmap and IS for coarse indexes */ 4109 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4110 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4111 4112 /* creates temporary MATIS object for coarse matrix */ 4113 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4114 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4115 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4116 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4117 #if 0 4118 { 4119 PetscViewer viewer; 4120 char filename[256]; 4121 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4122 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4123 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4124 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4125 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4126 } 4127 #endif 4128 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4129 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4130 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4131 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4132 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4133 4134 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4135 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4136 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4137 const PetscInt *idxs; 4138 ISLocalToGlobalMapping tmap; 4139 4140 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4141 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4142 /* allocate space for temporary storage */ 4143 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4144 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4145 /* allocate for IS array */ 4146 nisdofs = pcbddc->n_ISForDofsLocal; 4147 nisneu = !!pcbddc->NeumannBoundariesLocal; 4148 nis = nisdofs + nisneu; 4149 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4150 /* dofs splitting */ 4151 for (i=0;i<nisdofs;i++) { 4152 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4153 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4154 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4155 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4156 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4157 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4158 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4159 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4160 } 4161 /* neumann boundaries */ 4162 if (pcbddc->NeumannBoundariesLocal) { 4163 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4164 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4165 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4166 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4167 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4168 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4169 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4170 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4171 } 4172 /* free memory */ 4173 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4174 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4175 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4176 } else { 4177 nis = 0; 4178 nisdofs = 0; 4179 nisneu = 0; 4180 isarray = NULL; 4181 } 4182 /* destroy no longer needed map */ 4183 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4184 4185 /* restrict on coarse candidates (if needed) */ 4186 coarse_mat_is = NULL; 4187 if (csin) { 4188 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4189 if (redist) { 4190 PetscMPIInt rank; 4191 PetscInt spc,n_spc_p1,dest[1],destsize; 4192 4193 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4194 spc = active_procs/ncoarse; 4195 n_spc_p1 = active_procs%ncoarse; 4196 if (im_active) { 4197 destsize = 1; 4198 if (rank > n_spc_p1*(spc+1)-1) { 4199 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4200 } else { 4201 dest[0] = rank/(spc+1); 4202 } 4203 } else { 4204 destsize = 0; 4205 } 4206 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4207 } else if (csin_type_simple) { 4208 PetscMPIInt rank; 4209 PetscInt issize,isidx; 4210 4211 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4212 if (im_active) { 4213 issize = 1; 4214 isidx = (PetscInt)rank; 4215 } else { 4216 issize = 0; 4217 isidx = -1; 4218 } 4219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4220 } else { /* get a suitable subassembling pattern from MATIS code */ 4221 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4222 } 4223 4224 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4225 if (!redist || ncoarse <= void_procs) { 4226 PetscInt ncoarse_cand,tissize,*nisindices; 4227 PetscInt *coarse_candidates; 4228 const PetscInt* tisindices; 4229 4230 /* get coarse candidates' ranks in pc communicator */ 4231 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4232 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4233 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4234 if (!coarse_candidates[i]) { 4235 coarse_candidates[ncoarse_cand++]=i; 4236 } 4237 } 4238 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4239 4240 4241 if (pcbddc->dbg_flag) { 4242 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4244 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4245 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4246 for (i=0;i<ncoarse_cand;i++) { 4247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4248 } 4249 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4251 } 4252 /* shift the pattern on coarse candidates */ 4253 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4254 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4255 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4256 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4257 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4258 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4259 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4260 } 4261 if (pcbddc->dbg_flag) { 4262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4264 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4265 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4266 } 4267 } 4268 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4269 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4270 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4271 } else { /* this is the last level, so use just receiving processes in subcomm */ 4272 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4273 } 4274 } else { 4275 if (pcbddc->dbg_flag) { 4276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4279 } 4280 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4281 coarse_mat_is = t_coarse_mat_is; 4282 } 4283 4284 /* create local to global scatters for coarse problem */ 4285 if (compute_vecs) { 4286 PetscInt lrows; 4287 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4288 if (coarse_mat_is) { 4289 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4290 } else { 4291 lrows = 0; 4292 } 4293 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4294 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4295 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4296 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4297 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4298 } 4299 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4300 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4301 4302 /* set defaults for coarse KSP and PC */ 4303 if (multilevel_allowed) { 4304 coarse_ksp_type = KSPRICHARDSON; 4305 coarse_pc_type = PCBDDC; 4306 } else { 4307 coarse_ksp_type = KSPPREONLY; 4308 coarse_pc_type = PCREDUNDANT; 4309 } 4310 4311 /* print some info if requested */ 4312 if (pcbddc->dbg_flag) { 4313 if (!multilevel_allowed) { 4314 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4315 if (multilevel_requested) { 4316 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4317 } else if (pcbddc->max_levels) { 4318 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4319 } 4320 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4321 } 4322 } 4323 4324 /* create the coarse KSP object only once with defaults */ 4325 if (coarse_mat_is) { 4326 MatReuse coarse_mat_reuse; 4327 PetscViewer dbg_viewer = NULL; 4328 if (pcbddc->dbg_flag) { 4329 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4330 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4331 } 4332 if (!pcbddc->coarse_ksp) { 4333 char prefix[256],str_level[16]; 4334 size_t len; 4335 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4336 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4337 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4338 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4339 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4340 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4341 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4342 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4343 /* prefix */ 4344 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4345 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4346 if (!pcbddc->current_level) { 4347 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4348 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4349 } else { 4350 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4351 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4352 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4353 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4354 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4355 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4356 } 4357 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4358 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4359 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4360 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4361 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4362 /* allow user customization */ 4363 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4364 } 4365 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4366 if (nisdofs) { 4367 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4368 for (i=0;i<nisdofs;i++) { 4369 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4370 } 4371 } 4372 if (nisneu) { 4373 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4374 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4375 } 4376 4377 /* get some info after set from options */ 4378 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4379 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4380 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4381 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4382 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4383 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4384 isbddc = PETSC_FALSE; 4385 } 4386 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4387 if (isredundant) { 4388 KSP inner_ksp; 4389 PC inner_pc; 4390 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4391 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4392 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4393 } 4394 4395 /* assemble coarse matrix */ 4396 if (coarse_reuse) { 4397 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4398 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4399 coarse_mat_reuse = MAT_REUSE_MATRIX; 4400 } else { 4401 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4402 } 4403 if (isbddc || isnn) { 4404 if (pcbddc->coarsening_ratio > 1) { 4405 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4406 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4407 if (pcbddc->dbg_flag) { 4408 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4410 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4411 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4412 } 4413 } 4414 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4415 } else { 4416 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4417 coarse_mat = coarse_mat_is; 4418 } 4419 } else { 4420 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4421 } 4422 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4423 4424 /* propagate symmetry info of coarse matrix */ 4425 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4426 if (pc->pmat->symmetric_set) { 4427 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4428 } 4429 if (pc->pmat->hermitian_set) { 4430 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4431 } 4432 if (pc->pmat->spd_set) { 4433 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4434 } 4435 /* set operators */ 4436 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4437 if (pcbddc->dbg_flag) { 4438 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4439 } 4440 } else { /* processes non partecipating to coarse solver (if any) */ 4441 coarse_mat = 0; 4442 } 4443 ierr = PetscFree(isarray);CHKERRQ(ierr); 4444 #if 0 4445 { 4446 PetscViewer viewer; 4447 char filename[256]; 4448 sprintf(filename,"coarse_mat.m"); 4449 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4450 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4451 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4452 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4453 } 4454 #endif 4455 4456 /* Compute coarse null space (special handling by BDDC only) */ 4457 if (pcbddc->NullSpace) { 4458 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4459 } 4460 4461 if (pcbddc->coarse_ksp) { 4462 Vec crhs,csol; 4463 PetscBool ispreonly; 4464 4465 if (CoarseNullSpace) { 4466 if (isbddc) { 4467 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4468 } else { 4469 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4470 } 4471 } 4472 /* setup coarse ksp */ 4473 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4474 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4475 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4476 /* hack */ 4477 if (!csol) { 4478 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4479 } 4480 if (!crhs) { 4481 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4482 } 4483 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4484 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4485 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4486 KSP check_ksp; 4487 KSPType check_ksp_type; 4488 PC check_pc; 4489 Vec check_vec,coarse_vec; 4490 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4491 PetscInt its; 4492 PetscBool compute_eigs; 4493 PetscReal *eigs_r,*eigs_c; 4494 PetscInt neigs; 4495 const char *prefix; 4496 4497 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4498 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4499 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4500 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4501 if (ispreonly) { 4502 check_ksp_type = KSPPREONLY; 4503 compute_eigs = PETSC_FALSE; 4504 } else { 4505 check_ksp_type = KSPGMRES; 4506 compute_eigs = PETSC_TRUE; 4507 } 4508 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4509 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4510 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4511 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4512 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4513 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4514 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4515 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4516 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4517 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4518 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4519 /* create random vec */ 4520 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4521 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4522 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4523 if (CoarseNullSpace) { 4524 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4525 } 4526 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4527 /* solve coarse problem */ 4528 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4529 if (CoarseNullSpace) { 4530 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4531 } 4532 /* set eigenvalue estimation if preonly has not been requested */ 4533 if (compute_eigs) { 4534 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4535 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4536 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4537 lambda_max = eigs_r[neigs-1]; 4538 lambda_min = eigs_r[0]; 4539 if (pcbddc->use_coarse_estimates) { 4540 if (lambda_max>lambda_min) { 4541 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4542 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4543 } 4544 } 4545 } 4546 4547 /* check coarse problem residual error */ 4548 if (pcbddc->dbg_flag) { 4549 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4550 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4551 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4552 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4553 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4554 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4555 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4556 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4557 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4558 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4559 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4560 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4561 if (compute_eigs) { 4562 PetscReal lambda_max_s,lambda_min_s; 4563 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4564 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4565 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4566 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4567 for (i=0;i<neigs;i++) { 4568 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4569 } 4570 } 4571 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4572 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4573 } 4574 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4575 if (compute_eigs) { 4576 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4577 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4578 } 4579 } 4580 } 4581 /* print additional info */ 4582 if (pcbddc->dbg_flag) { 4583 /* waits until all processes reaches this point */ 4584 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4587 } 4588 4589 /* free memory */ 4590 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4591 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4592 PetscFunctionReturn(0); 4593 } 4594 4595 #undef __FUNCT__ 4596 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4597 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4598 { 4599 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4600 PC_IS* pcis = (PC_IS*)pc->data; 4601 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4602 IS subset,subset_mult,subset_n; 4603 PetscInt local_size,coarse_size=0; 4604 PetscInt *local_primal_indices=NULL; 4605 const PetscInt *t_local_primal_indices; 4606 PetscErrorCode ierr; 4607 4608 PetscFunctionBegin; 4609 /* Compute global number of coarse dofs */ 4610 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4611 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4612 } 4613 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4614 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4615 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4616 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4617 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4618 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4619 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4620 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4621 if (local_size != pcbddc->local_primal_size) { 4622 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4623 } 4624 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4625 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4626 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4627 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4628 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4629 4630 /* check numbering */ 4631 if (pcbddc->dbg_flag) { 4632 PetscScalar coarsesum,*array; 4633 PetscInt i; 4634 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4635 4636 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4637 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4639 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4640 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4641 for (i=0;i<pcbddc->local_primal_size;i++) { 4642 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4643 } 4644 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4645 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4646 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4647 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4648 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4649 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4650 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4651 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4652 for (i=0;i<pcis->n;i++) { 4653 if (array[i] == 1.0) { 4654 set_error = PETSC_TRUE; 4655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4656 } 4657 } 4658 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 for (i=0;i<pcis->n;i++) { 4661 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4662 } 4663 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4664 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4665 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4666 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4667 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4668 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4669 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4670 PetscInt *gidxs; 4671 4672 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4673 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4676 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4677 for (i=0;i<pcbddc->local_primal_size;i++) { 4678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4679 } 4680 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4681 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4682 } 4683 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4684 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4685 } 4686 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4687 /* get back data */ 4688 *coarse_size_n = coarse_size; 4689 *local_primal_indices_n = local_primal_indices; 4690 PetscFunctionReturn(0); 4691 } 4692 4693 #undef __FUNCT__ 4694 #define __FUNCT__ "PCBDDCGlobalToLocal" 4695 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4696 { 4697 IS localis_t; 4698 PetscInt i,lsize,*idxs,n; 4699 PetscScalar *vals; 4700 PetscErrorCode ierr; 4701 4702 PetscFunctionBegin; 4703 /* get indices in local ordering exploiting local to global map */ 4704 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4705 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4706 for (i=0;i<lsize;i++) vals[i] = 1.0; 4707 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4708 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4709 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4710 if (idxs) { /* multilevel guard */ 4711 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4712 } 4713 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4714 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4715 ierr = PetscFree(vals);CHKERRQ(ierr); 4716 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4717 /* now compute set in local ordering */ 4718 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4719 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4720 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4721 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4722 for (i=0,lsize=0;i<n;i++) { 4723 if (PetscRealPart(vals[i]) > 0.5) { 4724 lsize++; 4725 } 4726 } 4727 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4728 for (i=0,lsize=0;i<n;i++) { 4729 if (PetscRealPart(vals[i]) > 0.5) { 4730 idxs[lsize++] = i; 4731 } 4732 } 4733 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4734 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4735 *localis = localis_t; 4736 PetscFunctionReturn(0); 4737 } 4738 4739 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4740 #undef __FUNCT__ 4741 #define __FUNCT__ "PCBDDCMatMult_Private" 4742 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4743 { 4744 PCBDDCChange_ctx change_ctx; 4745 PetscErrorCode ierr; 4746 4747 PetscFunctionBegin; 4748 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4749 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4750 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4751 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4752 PetscFunctionReturn(0); 4753 } 4754 4755 #undef __FUNCT__ 4756 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4757 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4758 { 4759 PCBDDCChange_ctx change_ctx; 4760 PetscErrorCode ierr; 4761 4762 PetscFunctionBegin; 4763 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4764 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4765 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4766 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4767 PetscFunctionReturn(0); 4768 } 4769 4770 #undef __FUNCT__ 4771 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4772 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4773 { 4774 PC_IS *pcis=(PC_IS*)pc->data; 4775 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4776 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4777 Mat S_j; 4778 PetscInt *used_xadj,*used_adjncy; 4779 PetscBool free_used_adj; 4780 PetscErrorCode ierr; 4781 4782 PetscFunctionBegin; 4783 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4784 free_used_adj = PETSC_FALSE; 4785 if (pcbddc->sub_schurs_layers == -1) { 4786 used_xadj = NULL; 4787 used_adjncy = NULL; 4788 } else { 4789 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4790 used_xadj = pcbddc->mat_graph->xadj; 4791 used_adjncy = pcbddc->mat_graph->adjncy; 4792 } else if (pcbddc->computed_rowadj) { 4793 used_xadj = pcbddc->mat_graph->xadj; 4794 used_adjncy = pcbddc->mat_graph->adjncy; 4795 } else { 4796 PetscBool flg_row=PETSC_FALSE; 4797 const PetscInt *xadj,*adjncy; 4798 PetscInt nvtxs; 4799 4800 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4801 if (flg_row) { 4802 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4803 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4804 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4805 free_used_adj = PETSC_TRUE; 4806 } else { 4807 pcbddc->sub_schurs_layers = -1; 4808 used_xadj = NULL; 4809 used_adjncy = NULL; 4810 } 4811 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4812 } 4813 } 4814 4815 /* setup sub_schurs data */ 4816 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4817 if (!sub_schurs->use_mumps) { 4818 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4819 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4820 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4821 } else { 4822 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4823 PetscBool isseqaij; 4824 if (!pcbddc->use_vertices && reuse_solvers) { 4825 PetscInt n_vertices; 4826 4827 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4828 reuse_solvers = (PetscBool)!n_vertices; 4829 } 4830 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4831 if (!isseqaij) { 4832 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4833 if (matis->A == pcbddc->local_mat) { 4834 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4835 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4836 } else { 4837 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4838 } 4839 } 4840 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4841 } 4842 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4843 4844 /* free adjacency */ 4845 if (free_used_adj) { 4846 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4847 } 4848 PetscFunctionReturn(0); 4849 } 4850 4851 #undef __FUNCT__ 4852 #define __FUNCT__ "PCBDDCInitSubSchurs" 4853 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4854 { 4855 PC_IS *pcis=(PC_IS*)pc->data; 4856 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4857 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4858 PCBDDCGraph graph; 4859 PetscErrorCode ierr; 4860 4861 PetscFunctionBegin; 4862 /* attach interface graph for determining subsets */ 4863 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4864 IS verticesIS,verticescomm; 4865 PetscInt vsize,*idxs; 4866 4867 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4868 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4869 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4870 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4871 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4872 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4873 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4874 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4875 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4876 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4877 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4878 /* 4879 if (pcbddc->dbg_flag) { 4880 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4881 } 4882 */ 4883 } else { 4884 graph = pcbddc->mat_graph; 4885 } 4886 4887 /* sub_schurs init */ 4888 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4889 4890 /* free graph struct */ 4891 if (pcbddc->sub_schurs_rebuild) { 4892 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4893 } 4894 PetscFunctionReturn(0); 4895 } 4896