1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 472 { 473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 474 PC_IS *pcis = (PC_IS*)pc->data; 475 VecType impVecType; 476 PetscInt n_constraints,n_R,old_size; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->ConstraintMatrix) { 481 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 482 } 483 /* get sizes */ 484 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 485 n_R = pcis->n-pcbddc->n_vertices; 486 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 487 /* local work vectors (try to avoid unneeded work)*/ 488 /* R nodes */ 489 old_size = -1; 490 if (pcbddc->vec1_R) { 491 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 492 } 493 if (n_R != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 499 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 500 } 501 /* local primal dofs */ 502 old_size = -1; 503 if (pcbddc->vec1_P) { 504 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 505 } 506 if (pcbddc->local_primal_size != old_size) { 507 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 508 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 510 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 511 } 512 /* local explicit constraints */ 513 old_size = -1; 514 if (pcbddc->vec1_C) { 515 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 516 } 517 if (n_constraints && n_constraints != old_size) { 518 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 519 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 520 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 521 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSetUpCorrection" 528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 529 { 530 PetscErrorCode ierr; 531 /* pointers to pcis and pcbddc */ 532 PC_IS* pcis = (PC_IS*)pc->data; 533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 535 /* submatrices of local problem */ 536 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 537 /* submatrices of local coarse problem */ 538 Mat S_VV,S_CV,S_VC,S_CC; 539 /* working matrices */ 540 Mat C_CR; 541 /* additional working stuff */ 542 PC pc_R; 543 Mat F; 544 PetscBool isLU,isCHOL,isILU; 545 546 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 547 PetscScalar *work; 548 PetscInt *idx_V_B; 549 PetscInt n,n_vertices,n_constraints; 550 PetscInt i,n_R,n_D,n_B; 551 PetscBool unsymmetric_check; 552 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 553 MatType impMatType; 554 /* some shortcuts to scalars */ 555 PetscScalar one=1.0,m_one=-1.0; 556 557 PetscFunctionBegin; 558 n_vertices = pcbddc->n_vertices; 559 n_constraints = pcbddc->local_primal_size-n_vertices; 560 /* Set Non-overlapping dimensions */ 561 n_B = pcis->n_B; 562 n_D = pcis->n - n_B; 563 n_R = pcis->n - n_vertices; 564 565 /* Set types for local objects needed by BDDC precondtioner */ 566 impMatType = MATSEQDENSE; 567 568 /* vertices in boundary numbering */ 569 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 570 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 571 if (i != n_vertices) { 572 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 573 } 574 575 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 576 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 586 unsymmetric_check = PETSC_FALSE; 587 /* allocate workspace */ 588 n = 0; 589 if (n_constraints) { 590 n += n_R*n_constraints; 591 } 592 if (n_vertices) { 593 n = PetscMax(2*n_R*n_vertices,n); 594 n = PetscMax((n_R+n_B)*n_vertices,n); 595 } 596 if (!pcbddc->symmetric_primal) { 597 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 598 unsymmetric_check = PETSC_TRUE; 599 } 600 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 601 602 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 603 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 607 if (isLU || isILU || isCHOL) { 608 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 609 } else if (sub_schurs->reuse_mumps) { 610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 611 MatFactorType type; 612 613 F = reuse_mumps->F; 614 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 615 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 616 } else { 617 F = NULL; 618 } 619 620 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 621 if (n_constraints) { 622 Mat M1,M2,M3; 623 Mat auxmat; 624 IS is_aux; 625 PetscScalar *array,*array2; 626 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 630 /* Extract constraints on R nodes: C_{CR} */ 631 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 632 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 633 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 634 635 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 636 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 637 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 const PetscScalar *row_cmat_values; 640 const PetscInt *row_cmat_indices; 641 PetscInt size_of_constraint,j; 642 643 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 644 for (j=0;j<size_of_constraint;j++) { 645 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 646 } 647 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 650 if (F) { 651 Mat B; 652 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 654 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 655 ierr = MatDestroy(&B);CHKERRQ(ierr); 656 } else { 657 PetscScalar *marr; 658 659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 660 for (i=0;i<n_constraints;i++) { 661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 666 } 667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 668 } 669 if (!pcbddc->switch_static) { 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 for (i=0;i<n_constraints;i++) { 674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 680 } 681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 683 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 684 } else { 685 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 686 pcbddc->local_auxmat2 = local_auxmat2_R; 687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 694 if (isCHOL) { 695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 696 } else { 697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 698 } 699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 705 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 706 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 709 } 710 /* Get submatrices from subdomain matrix */ 711 if (n_vertices) { 712 IS is_aux; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 728 } 729 730 /* Matrix of coarse basis functions (local) */ 731 if (pcbddc->coarse_phi_B) { 732 PetscInt on_B,on_primal,on_D=n_D; 733 if (pcbddc->coarse_phi_D) { 734 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 735 } 736 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 737 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 738 PetscScalar *marray; 739 740 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 741 ierr = PetscFree(marray);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 746 } 747 } 748 749 if (!pcbddc->coarse_phi_B) { 750 PetscScalar *marray; 751 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 n += n_D*pcbddc->local_primal_size; 755 } 756 if (!pcbddc->symmetric_primal) { 757 n *= 2; 758 } 759 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 761 n = n_B*pcbddc->local_primal_size; 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 n += n_D*pcbddc->local_primal_size; 765 } 766 if (!pcbddc->symmetric_primal) { 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 769 n = n_B*pcbddc->local_primal_size; 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 774 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 775 if (pcbddc->switch_static || pcbddc->dbg_flag) { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 777 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 778 } 779 } 780 } 781 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 782 /* vertices */ 783 if (n_vertices) { 784 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 787 if (n_R) { 788 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 789 PetscBLASInt B_N,B_one = 1; 790 PetscScalar *x,*y; 791 PetscBool isseqaij; 792 793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 794 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 796 if (F) { /* TODO could be optimized for symmetric problems */ 797 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 798 } else { 799 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 803 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 810 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 811 if (n_constraints) { 812 Mat B; 813 814 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 815 for (i=0;i<n_vertices;i++) { 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 817 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 818 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 822 } 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 828 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 829 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 831 ierr = MatDestroy(&B);CHKERRQ(ierr); 832 } 833 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 834 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 835 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 836 } 837 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 838 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 839 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 842 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 843 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 845 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 847 } else { 848 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_vertices;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 y[n_B*i+idx_V_B[i]] = 1.0; 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 863 864 if (pcbddc->switch_static || pcbddc->dbg_flag) { 865 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 } 872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 873 } 874 /* if n_R == 0 the object is not destroyed */ 875 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 876 } 877 878 if (n_constraints) { 879 Mat B; 880 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 882 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 883 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 if (n_vertices) { 886 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 887 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 888 } else { 889 Mat S_VCt; 890 891 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 892 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 893 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<n_constraints;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 909 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 910 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 911 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 } 916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 917 } 918 } 919 if (n_constraints) { 920 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 921 } 922 923 /* compute other basis functions for non-symmetric problems */ 924 if (!pcbddc->symmetric_primal) { 925 926 if (n_constraints) { 927 Mat S_CCT,B_C; 928 929 /* this is a lazy thing */ 930 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 932 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 933 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 934 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 935 if (n_vertices) { 936 Mat B_V,S_VCT; 937 938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 940 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 941 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 943 } 944 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 945 } else { /* if there are no constraints, reset work */ 946 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 if (n_vertices && n_R) { 949 Mat A_VRT; 950 PetscScalar *marray; 951 PetscBLASInt B_N,B_one = 1; 952 953 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 954 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 955 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 957 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 958 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 960 } 961 962 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 965 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 966 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 969 } 970 } else { 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 973 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 977 } 978 } 979 /* coarse basis functions */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 PetscScalar *y; 982 983 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 984 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 if (i<n_vertices) { 989 y[n_B*i+idx_V_B[i]] = 1.0; 990 } 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 992 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 993 994 if (pcbddc->switch_static || pcbddc->dbg_flag) { 995 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1001 } 1002 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1003 } 1004 } 1005 /* free memory */ 1006 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1008 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (n_vertices) { 1013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1014 } 1015 if (n_constraints) { 1016 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1017 } 1018 /* Checking coarse_sub_mat and coarse basis functios */ 1019 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1020 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1021 if (pcbddc->dbg_flag) { 1022 Mat coarse_sub_mat; 1023 Mat AUXMAT,TM1,TM2,TM3,TM4; 1024 Mat coarse_phi_D,coarse_phi_B; 1025 Mat coarse_psi_D,coarse_psi_B; 1026 Mat A_II,A_BB,A_IB,A_BI; 1027 Mat C_B,CPHI; 1028 IS is_dummy; 1029 Vec mones; 1030 MatType checkmattype=MATSEQAIJ; 1031 PetscReal real_value; 1032 1033 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1034 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1035 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1042 } 1043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1048 if (unsymmetric_check) { 1049 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1063 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 } 1071 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 1080 /* check constraints */ 1081 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1082 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1083 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1084 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1085 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1086 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1087 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 if (unsymmetric_check) { 1090 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 } 1096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1097 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1098 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1099 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1103 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1107 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1112 if (unsymmetric_check) { 1113 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1115 } 1116 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1117 } 1118 /* get back data */ 1119 *coarse_submat_vals_n = coarse_submat_vals; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1126 { 1127 Mat *work_mat; 1128 IS isrow_s,iscol_s; 1129 PetscBool rsorted,csorted; 1130 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1135 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1136 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1137 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1138 1139 if (!rsorted) { 1140 const PetscInt *idxs; 1141 PetscInt *idxs_sorted,i; 1142 1143 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1144 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1145 for (i=0;i<rsize;i++) { 1146 idxs_perm_r[i] = i; 1147 } 1148 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1149 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1150 for (i=0;i<rsize;i++) { 1151 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1152 } 1153 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1154 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1155 } else { 1156 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1157 isrow_s = isrow; 1158 } 1159 1160 if (!csorted) { 1161 if (isrow == iscol) { 1162 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1163 iscol_s = isrow_s; 1164 } else { 1165 const PetscInt *idxs; 1166 PetscInt *idxs_sorted,i; 1167 1168 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1170 for (i=0;i<csize;i++) { 1171 idxs_perm_c[i] = i; 1172 } 1173 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1174 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1175 for (i=0;i<csize;i++) { 1176 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1177 } 1178 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1179 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 iscol_s = iscol; 1184 } 1185 1186 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1187 1188 if (!rsorted || !csorted) { 1189 Mat new_mat; 1190 IS is_perm_r,is_perm_c; 1191 1192 if (!rsorted) { 1193 PetscInt *idxs_r,i; 1194 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1195 for (i=0;i<rsize;i++) { 1196 idxs_r[idxs_perm_r[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1200 } else { 1201 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1202 } 1203 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1204 1205 if (!csorted) { 1206 if (isrow_s == iscol_s) { 1207 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1208 is_perm_c = is_perm_r; 1209 } else { 1210 PetscInt *idxs_c,i; 1211 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1212 for (i=0;i<csize;i++) { 1213 idxs_c[idxs_perm_c[i]] = i; 1214 } 1215 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 } else { 1219 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1222 1223 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1225 work_mat[0] = new_mat; 1226 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1227 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1228 } 1229 1230 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1231 *B = work_mat[0]; 1232 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1234 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1241 { 1242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1243 PC_IS* pcis = (PC_IS*)pc->data; 1244 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1245 Mat new_mat; 1246 IS is_local,is_global; 1247 PetscInt local_size; 1248 PetscBool isseqaij; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1253 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1254 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1255 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1256 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1257 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1258 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1259 1260 /* check */ 1261 if (pcbddc->dbg_flag) { 1262 Vec x,x_change; 1263 PetscReal error; 1264 1265 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1266 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1267 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1274 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1277 ierr = VecDestroy(&x);CHKERRQ(ierr); 1278 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1279 } 1280 1281 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1282 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1283 if (isseqaij) { 1284 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 } else { 1286 Mat work_mat; 1287 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1288 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1289 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1290 } 1291 if (matis->A->symmetric_set) { 1292 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1293 #if !defined(PETSC_USE_COMPLEX) 1294 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1295 #endif 1296 } 1297 /* 1298 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1299 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1300 */ 1301 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1307 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1308 { 1309 PC_IS* pcis = (PC_IS*)(pc->data); 1310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1311 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1312 PetscInt *idx_R_local=NULL; 1313 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1314 PetscInt vbs,bs; 1315 PetscBT bitmask=NULL; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* 1320 No need to setup local scatters if 1321 - primal space is unchanged 1322 AND 1323 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1324 AND 1325 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1326 */ 1327 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1328 PetscFunctionReturn(0); 1329 } 1330 /* destroy old objects */ 1331 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1333 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1334 /* Set Non-overlapping dimensions */ 1335 n_B = pcis->n_B; 1336 n_D = pcis->n - n_B; 1337 n_vertices = pcbddc->n_vertices; 1338 1339 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1340 1341 /* create auxiliary bitmask and allocate workspace */ 1342 if (!sub_schurs->reuse_mumps) { 1343 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1344 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1345 for (i=0;i<n_vertices;i++) { 1346 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1347 } 1348 1349 for (i=0, n_R=0; i<pcis->n; i++) { 1350 if (!PetscBTLookup(bitmask,i)) { 1351 idx_R_local[n_R++] = i; 1352 } 1353 } 1354 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1355 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1356 1357 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1358 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1359 } 1360 1361 /* Block code */ 1362 vbs = 1; 1363 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1364 if (bs>1 && !(n_vertices%bs)) { 1365 PetscBool is_blocked = PETSC_TRUE; 1366 PetscInt *vary; 1367 if (!sub_schurs->reuse_mumps) { 1368 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1369 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1370 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1371 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1372 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1373 for (i=0; i<pcis->n/bs; i++) { 1374 if (vary[i]!=0 && vary[i]!=bs) { 1375 is_blocked = PETSC_FALSE; 1376 break; 1377 } 1378 } 1379 ierr = PetscFree(vary);CHKERRQ(ierr); 1380 } else { 1381 /* Verify directly the R set */ 1382 for (i=0; i<n_R/bs; i++) { 1383 PetscInt j,node=idx_R_local[bs*i]; 1384 for (j=1; j<bs; j++) { 1385 if (node != idx_R_local[bs*i+j]-j) { 1386 is_blocked = PETSC_FALSE; 1387 break; 1388 } 1389 } 1390 } 1391 } 1392 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1393 vbs = bs; 1394 for (i=0;i<n_R/vbs;i++) { 1395 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1396 } 1397 } 1398 } 1399 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1400 if (sub_schurs->reuse_mumps) { 1401 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1402 1403 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1404 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1405 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1406 reuse_mumps->is_R = pcbddc->is_R_local; 1407 } else { 1408 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1409 } 1410 1411 /* print some info if requested */ 1412 if (pcbddc->dbg_flag) { 1413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1420 } 1421 1422 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1423 if (!sub_schurs->reuse_mumps) { 1424 IS is_aux1,is_aux2; 1425 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1426 1427 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1429 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1430 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0; i<n_D; i++) { 1432 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1433 } 1434 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_R; i++) { 1436 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1437 aux_array1[j++] = i; 1438 } 1439 } 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1441 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 for (i=0, j=0; i<n_B; i++) { 1443 if (!PetscBTLookup(bitmask,is_indices[i])) { 1444 aux_array2[j++] = i; 1445 } 1446 } 1447 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1448 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1449 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1451 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1452 1453 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1454 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1455 for (i=0, j=0; i<n_R; i++) { 1456 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1457 aux_array1[j++] = i; 1458 } 1459 } 1460 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1461 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1462 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1463 } 1464 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1465 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1466 } else { 1467 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1468 IS tis; 1469 PetscInt schur_size; 1470 1471 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1472 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1476 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1477 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1478 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1479 } 1480 } 1481 PetscFunctionReturn(0); 1482 } 1483 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1487 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1488 { 1489 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1490 PC_IS *pcis = (PC_IS*)pc->data; 1491 PC pc_temp; 1492 Mat A_RR; 1493 MatReuse reuse; 1494 PetscScalar m_one = -1.0; 1495 PetscReal value; 1496 PetscInt n_D,n_R; 1497 PetscBool use_exact,use_exact_reduced,issbaij; 1498 PetscErrorCode ierr; 1499 /* prefixes stuff */ 1500 char dir_prefix[256],neu_prefix[256],str_level[16]; 1501 size_t len; 1502 1503 PetscFunctionBegin; 1504 1505 /* compute prefixes */ 1506 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1507 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1508 if (!pcbddc->current_level) { 1509 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1510 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1511 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1512 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1513 } else { 1514 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1515 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1516 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1517 len -= 15; /* remove "pc_bddc_coarse_" */ 1518 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1519 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1520 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1521 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1522 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1523 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1524 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1525 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1526 } 1527 1528 /* DIRICHLET PROBLEM */ 1529 if (dirichlet) { 1530 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1531 if (pcbddc->local_mat->symmetric_set) { 1532 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1533 } 1534 /* Matrix for Dirichlet problem is pcis->A_II */ 1535 n_D = pcis->n - pcis->n_B; 1536 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1537 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1538 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1539 /* default */ 1540 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1541 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1542 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1543 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1544 if (issbaij) { 1545 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1546 } else { 1547 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1548 } 1549 /* Allow user's customization */ 1550 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1551 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1552 } 1553 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1554 if (sub_schurs->reuse_mumps) { 1555 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1556 1557 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1558 } 1559 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1560 if (!n_D) { 1561 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1562 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1563 } 1564 /* Set Up KSP for Dirichlet problem of BDDC */ 1565 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1566 /* set ksp_D into pcis data */ 1567 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1568 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1569 pcis->ksp_D = pcbddc->ksp_D; 1570 } 1571 1572 /* NEUMANN PROBLEM */ 1573 A_RR = 0; 1574 if (neumann) { 1575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1576 PetscInt ibs,mbs; 1577 PetscBool issbaij; 1578 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1579 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1580 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1581 if (pcbddc->ksp_R) { /* already created ksp */ 1582 PetscInt nn_R; 1583 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1584 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1585 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1586 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1587 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1591 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1592 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1593 reuse = MAT_INITIAL_MATRIX; 1594 } else { /* safe to reuse the matrix */ 1595 reuse = MAT_REUSE_MATRIX; 1596 } 1597 } 1598 /* last check */ 1599 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1600 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1601 reuse = MAT_INITIAL_MATRIX; 1602 } 1603 } else { /* first time, so we need to create the matrix */ 1604 reuse = MAT_INITIAL_MATRIX; 1605 } 1606 /* extract A_RR */ 1607 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1608 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1610 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1611 if (matis->A == pcbddc->local_mat) { 1612 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1613 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1614 } else { 1615 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1616 } 1617 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1618 if (matis->A == pcbddc->local_mat) { 1619 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1620 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1623 } 1624 } 1625 if (!sub_schurs->reuse_mumps) { 1626 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1627 if (pcbddc->local_mat->symmetric_set) { 1628 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1629 } 1630 } else { 1631 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1632 1633 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1634 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1636 } 1637 if (!pcbddc->ksp_R) { /* create object if not present */ 1638 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1639 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1640 /* default */ 1641 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1642 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1643 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1644 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1645 if (issbaij) { 1646 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1647 } else { 1648 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1649 } 1650 /* Allow user's customization */ 1651 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1652 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1653 } 1654 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1655 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1656 if (!n_R) { 1657 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1658 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1659 } 1660 /* Reuse MUMPS solver if it is present */ 1661 if (sub_schurs->reuse_mumps) { 1662 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1663 1664 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1665 } 1666 /* Set Up KSP for Neumann problem of BDDC */ 1667 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1668 } 1669 /* free Neumann problem's matrix */ 1670 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1671 1672 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1673 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1678 } 1679 if (dirichlet) { /* Dirichlet */ 1680 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1681 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1682 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1683 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1684 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1685 /* need to be adapted? */ 1686 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1687 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1688 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1689 /* print info */ 1690 if (pcbddc->dbg_flag) { 1691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 } 1694 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1695 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1696 } 1697 } 1698 if (neumann) { /* Neumann */ 1699 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1700 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1701 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1702 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1703 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1704 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1705 /* need to be adapted? */ 1706 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1707 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1708 /* print info */ 1709 if (pcbddc->dbg_flag) { 1710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1712 } 1713 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1714 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1715 } 1716 } 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1723 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1724 { 1725 PetscErrorCode ierr; 1726 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1727 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1728 1729 PetscFunctionBegin; 1730 if (!sub_schurs->reuse_mumps) { 1731 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1732 } 1733 if (!pcbddc->switch_static) { 1734 if (applytranspose && pcbddc->local_auxmat1) { 1735 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1736 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1737 } 1738 if (!sub_schurs->reuse_mumps) { 1739 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 } else { 1742 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1743 1744 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 } 1747 } else { 1748 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1750 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 if (applytranspose && pcbddc->local_auxmat1) { 1753 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1754 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 } 1759 if (!sub_schurs->reuse_mumps) { 1760 if (applytranspose) { 1761 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1762 } else { 1763 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1764 } 1765 } else { 1766 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1767 1768 if (applytranspose) { 1769 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1770 } else { 1771 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1772 } 1773 } 1774 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1775 if (!pcbddc->switch_static) { 1776 if (!sub_schurs->reuse_mumps) { 1777 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 } else { 1780 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1781 1782 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1784 } 1785 if (!applytranspose && pcbddc->local_auxmat1) { 1786 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1787 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1788 } 1789 } else { 1790 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 if (!applytranspose && pcbddc->local_auxmat1) { 1795 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1796 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1797 } 1798 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 } 1803 PetscFunctionReturn(0); 1804 } 1805 1806 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1809 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1810 { 1811 PetscErrorCode ierr; 1812 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1813 PC_IS* pcis = (PC_IS*) (pc->data); 1814 const PetscScalar zero = 0.0; 1815 1816 PetscFunctionBegin; 1817 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1818 if (applytranspose) { 1819 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1820 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1821 } else { 1822 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1823 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1824 } 1825 /* start communications from local primal nodes to rhs of coarse solver */ 1826 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1827 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1828 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 1830 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1831 /* TODO remove null space when doing multilevel */ 1832 if (pcbddc->coarse_ksp) { 1833 Vec rhs,sol; 1834 1835 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1836 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1837 if (applytranspose) { 1838 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1839 } else { 1840 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1841 } 1842 } 1843 1844 /* Local solution on R nodes */ 1845 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1846 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1847 } 1848 1849 /* communications from coarse sol to local primal nodes */ 1850 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1851 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1852 1853 /* Sum contributions from two levels */ 1854 if (applytranspose) { 1855 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1856 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1857 } else { 1858 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1859 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1866 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1867 { 1868 PetscErrorCode ierr; 1869 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1870 PetscScalar *array; 1871 Vec from,to; 1872 1873 PetscFunctionBegin; 1874 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1875 from = pcbddc->coarse_vec; 1876 to = pcbddc->vec1_P; 1877 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1878 Vec tvec; 1879 1880 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1881 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1882 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1883 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1884 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1885 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1886 } 1887 } else { /* from local to global -> put data in coarse right hand side */ 1888 from = pcbddc->vec1_P; 1889 to = pcbddc->coarse_vec; 1890 } 1891 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1897 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1898 { 1899 PetscErrorCode ierr; 1900 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1901 PetscScalar *array; 1902 Vec from,to; 1903 1904 PetscFunctionBegin; 1905 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1906 from = pcbddc->coarse_vec; 1907 to = pcbddc->vec1_P; 1908 } else { /* from local to global -> put data in coarse right hand side */ 1909 from = pcbddc->vec1_P; 1910 to = pcbddc->coarse_vec; 1911 } 1912 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1913 if (smode == SCATTER_FORWARD) { 1914 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1915 Vec tvec; 1916 1917 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1918 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1919 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1920 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1921 } 1922 } else { 1923 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1924 ierr = VecResetArray(from);CHKERRQ(ierr); 1925 } 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /* uncomment for testing purposes */ 1931 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1934 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1935 { 1936 PetscErrorCode ierr; 1937 PC_IS* pcis = (PC_IS*)(pc->data); 1938 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1939 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1940 /* one and zero */ 1941 PetscScalar one=1.0,zero=0.0; 1942 /* space to store constraints and their local indices */ 1943 PetscScalar *constraints_data; 1944 PetscInt *constraints_idxs,*constraints_idxs_B; 1945 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1946 PetscInt *constraints_n; 1947 /* iterators */ 1948 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1949 /* BLAS integers */ 1950 PetscBLASInt lwork,lierr; 1951 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1952 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1953 /* reuse */ 1954 PetscInt olocal_primal_size,olocal_primal_size_cc; 1955 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1956 /* change of basis */ 1957 PetscBool qr_needed; 1958 PetscBT change_basis,qr_needed_idx; 1959 /* auxiliary stuff */ 1960 PetscInt *nnz,*is_indices; 1961 PetscInt ncc; 1962 /* some quantities */ 1963 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1964 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1965 1966 PetscFunctionBegin; 1967 /* Destroy Mat objects computed previously */ 1968 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1969 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1970 /* save info on constraints from previous setup (if any) */ 1971 olocal_primal_size = pcbddc->local_primal_size; 1972 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1973 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1974 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1975 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1976 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1977 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1978 1979 /* print some info */ 1980 if (pcbddc->dbg_flag) { 1981 IS vertices; 1982 PetscInt nv,nedges,nfaces; 1983 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1984 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1985 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1986 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1992 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1993 } 1994 1995 if (!pcbddc->adaptive_selection) { 1996 IS ISForVertices,*ISForFaces,*ISForEdges; 1997 MatNullSpace nearnullsp; 1998 const Vec *nearnullvecs; 1999 Vec *localnearnullsp; 2000 PetscScalar *array; 2001 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2002 PetscBool nnsp_has_cnst; 2003 /* LAPACK working arrays for SVD or POD */ 2004 PetscBool skip_lapack,boolforchange; 2005 PetscScalar *work; 2006 PetscReal *singular_vals; 2007 #if defined(PETSC_USE_COMPLEX) 2008 PetscReal *rwork; 2009 #endif 2010 #if defined(PETSC_MISSING_LAPACK_GESVD) 2011 PetscScalar *temp_basis,*correlation_mat; 2012 #else 2013 PetscBLASInt dummy_int=1; 2014 PetscScalar dummy_scalar=1.; 2015 #endif 2016 2017 /* Get index sets for faces, edges and vertices from graph */ 2018 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2019 /* free unneeded index sets */ 2020 if (!pcbddc->use_vertices) { 2021 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2022 } 2023 if (!pcbddc->use_edges) { 2024 for (i=0;i<n_ISForEdges;i++) { 2025 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2028 n_ISForEdges = 0; 2029 } 2030 if (!pcbddc->use_faces) { 2031 for (i=0;i<n_ISForFaces;i++) { 2032 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2033 } 2034 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2035 n_ISForFaces = 0; 2036 } 2037 2038 #if defined(PETSC_USE_DEBUG) 2039 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2040 Also use_change_of_basis should be consistent among processors */ 2041 if (pcbddc->NullSpace) { 2042 PetscBool tbool[2],gbool[2]; 2043 2044 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2045 pcbddc->use_change_of_basis = PETSC_TRUE; 2046 if (!ISForEdges) { 2047 pcbddc->use_change_on_faces = PETSC_TRUE; 2048 } 2049 } 2050 tbool[0] = pcbddc->use_change_of_basis; 2051 tbool[1] = pcbddc->use_change_on_faces; 2052 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2053 pcbddc->use_change_of_basis = gbool[0]; 2054 pcbddc->use_change_on_faces = gbool[1]; 2055 } 2056 #endif 2057 2058 /* check if near null space is attached to global mat */ 2059 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2060 if (nearnullsp) { 2061 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2062 /* remove any stored info */ 2063 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2064 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2065 /* store information for BDDC solver reuse */ 2066 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2067 pcbddc->onearnullspace = nearnullsp; 2068 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2069 for (i=0;i<nnsp_size;i++) { 2070 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2071 } 2072 } else { /* if near null space is not provided BDDC uses constants by default */ 2073 nnsp_size = 0; 2074 nnsp_has_cnst = PETSC_TRUE; 2075 } 2076 /* get max number of constraints on a single cc */ 2077 max_constraints = nnsp_size; 2078 if (nnsp_has_cnst) max_constraints++; 2079 2080 /* 2081 Evaluate maximum storage size needed by the procedure 2082 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2083 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2084 There can be multiple constraints per connected component 2085 */ 2086 n_vertices = 0; 2087 if (ISForVertices) { 2088 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2089 } 2090 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2091 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2092 2093 total_counts = n_ISForFaces+n_ISForEdges; 2094 total_counts *= max_constraints; 2095 total_counts += n_vertices; 2096 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2097 2098 total_counts = 0; 2099 max_size_of_constraint = 0; 2100 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2101 IS used_is; 2102 if (i<n_ISForEdges) { 2103 used_is = ISForEdges[i]; 2104 } else { 2105 used_is = ISForFaces[i-n_ISForEdges]; 2106 } 2107 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2108 total_counts += j; 2109 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2110 } 2111 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2112 2113 /* get local part of global near null space vectors */ 2114 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2115 for (k=0;k<nnsp_size;k++) { 2116 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2117 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 } 2120 2121 /* whether or not to skip lapack calls */ 2122 skip_lapack = PETSC_TRUE; 2123 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2124 2125 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2126 if (!skip_lapack) { 2127 PetscScalar temp_work; 2128 2129 #if defined(PETSC_MISSING_LAPACK_GESVD) 2130 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2131 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2134 #if defined(PETSC_USE_COMPLEX) 2135 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2136 #endif 2137 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2138 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2139 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2140 lwork = -1; 2141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2142 #if !defined(PETSC_USE_COMPLEX) 2143 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2144 #else 2145 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2146 #endif 2147 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2148 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2149 #else /* on missing GESVD */ 2150 /* SVD */ 2151 PetscInt max_n,min_n; 2152 max_n = max_size_of_constraint; 2153 min_n = max_constraints; 2154 if (max_size_of_constraint < max_constraints) { 2155 min_n = max_size_of_constraint; 2156 max_n = max_constraints; 2157 } 2158 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2159 #if defined(PETSC_USE_COMPLEX) 2160 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2161 #endif 2162 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2163 lwork = -1; 2164 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2167 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2168 #if !defined(PETSC_USE_COMPLEX) 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2170 #else 2171 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2172 #endif 2173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2174 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2175 #endif /* on missing GESVD */ 2176 /* Allocate optimal workspace */ 2177 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2178 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2179 } 2180 /* Now we can loop on constraining sets */ 2181 total_counts = 0; 2182 constraints_idxs_ptr[0] = 0; 2183 constraints_data_ptr[0] = 0; 2184 /* vertices */ 2185 if (n_vertices) { 2186 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2187 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2188 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2189 for (i=0;i<n_vertices;i++) { 2190 constraints_n[total_counts] = 1; 2191 constraints_data[total_counts] = 1.0; 2192 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2193 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2194 total_counts++; 2195 } 2196 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2197 PetscBool used_vertex; 2198 for (i=0;i<n_vertices;i++) { 2199 used_vertex = PETSC_FALSE; 2200 k = 0; 2201 while (!used_vertex && k<nnsp_size) { 2202 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2203 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2204 constraints_n[total_counts] = 1; 2205 constraints_idxs[total_counts] = is_indices[i]; 2206 constraints_data[total_counts] = 1.0; 2207 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2208 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2209 total_counts++; 2210 used_vertex = PETSC_TRUE; 2211 } 2212 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2213 k++; 2214 } 2215 } 2216 } 2217 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2218 n_vertices = total_counts; 2219 } 2220 2221 /* edges and faces */ 2222 total_counts_cc = total_counts; 2223 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2224 IS used_is; 2225 PetscBool idxs_copied = PETSC_FALSE; 2226 2227 if (ncc<n_ISForEdges) { 2228 used_is = ISForEdges[ncc]; 2229 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2230 } else { 2231 used_is = ISForFaces[ncc-n_ISForEdges]; 2232 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2233 } 2234 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2235 2236 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2237 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2238 /* change of basis should not be performed on local periodic nodes */ 2239 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2240 if (nnsp_has_cnst) { 2241 PetscScalar quad_value; 2242 2243 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2244 idxs_copied = PETSC_TRUE; 2245 2246 if (!pcbddc->use_nnsp_true) { 2247 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2248 } else { 2249 quad_value = 1.0; 2250 } 2251 for (j=0;j<size_of_constraint;j++) { 2252 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2253 } 2254 temp_constraints++; 2255 total_counts++; 2256 } 2257 for (k=0;k<nnsp_size;k++) { 2258 PetscReal real_value; 2259 PetscScalar *ptr_to_data; 2260 2261 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2262 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2263 for (j=0;j<size_of_constraint;j++) { 2264 ptr_to_data[j] = array[is_indices[j]]; 2265 } 2266 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2267 /* check if array is null on the connected component */ 2268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2269 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2270 if (real_value > 0.0) { /* keep indices and values */ 2271 temp_constraints++; 2272 total_counts++; 2273 if (!idxs_copied) { 2274 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2275 idxs_copied = PETSC_TRUE; 2276 } 2277 } 2278 } 2279 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2280 valid_constraints = temp_constraints; 2281 if (!pcbddc->use_nnsp_true && temp_constraints) { 2282 if (temp_constraints == 1) { /* just normalize the constraint */ 2283 PetscScalar norm,*ptr_to_data; 2284 2285 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2287 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2288 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2289 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2290 } else { /* perform SVD */ 2291 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2292 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2293 2294 #if defined(PETSC_MISSING_LAPACK_GESVD) 2295 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2296 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2297 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2298 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2299 from that computed using LAPACKgesvd 2300 -> This is due to a different computation of eigenvectors in LAPACKheev 2301 -> The quality of the POD-computed basis will be the same */ 2302 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2303 /* Store upper triangular part of correlation matrix */ 2304 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2305 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2306 for (j=0;j<temp_constraints;j++) { 2307 for (k=0;k<j+1;k++) { 2308 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2309 } 2310 } 2311 /* compute eigenvalues and eigenvectors of correlation matrix */ 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2314 #if !defined(PETSC_USE_COMPLEX) 2315 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2316 #else 2317 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2318 #endif 2319 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2320 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2321 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2322 j = 0; 2323 while (j < temp_constraints && singular_vals[j] < tol) j++; 2324 total_counts = total_counts-j; 2325 valid_constraints = temp_constraints-j; 2326 /* scale and copy POD basis into used quadrature memory */ 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2333 if (j<temp_constraints) { 2334 PetscInt ii; 2335 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2336 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2337 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2339 for (k=0;k<temp_constraints-j;k++) { 2340 for (ii=0;ii<size_of_constraint;ii++) { 2341 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2342 } 2343 } 2344 } 2345 #else /* on missing GESVD */ 2346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2347 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2349 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2350 #if !defined(PETSC_USE_COMPLEX) 2351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2352 #else 2353 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2354 #endif 2355 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2356 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2357 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2358 k = temp_constraints; 2359 if (k > size_of_constraint) k = size_of_constraint; 2360 j = 0; 2361 while (j < k && singular_vals[k-j-1] < tol) j++; 2362 valid_constraints = k-j; 2363 total_counts = total_counts-temp_constraints+valid_constraints; 2364 #endif /* on missing GESVD */ 2365 } 2366 } 2367 /* update pointers information */ 2368 if (valid_constraints) { 2369 constraints_n[total_counts_cc] = valid_constraints; 2370 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2371 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2372 /* set change_of_basis flag */ 2373 if (boolforchange) { 2374 PetscBTSet(change_basis,total_counts_cc); 2375 } 2376 total_counts_cc++; 2377 } 2378 } 2379 /* free workspace */ 2380 if (!skip_lapack) { 2381 ierr = PetscFree(work);CHKERRQ(ierr); 2382 #if defined(PETSC_USE_COMPLEX) 2383 ierr = PetscFree(rwork);CHKERRQ(ierr); 2384 #endif 2385 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2386 #if defined(PETSC_MISSING_LAPACK_GESVD) 2387 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2388 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2389 #endif 2390 } 2391 for (k=0;k<nnsp_size;k++) { 2392 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2393 } 2394 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2395 /* free index sets of faces, edges and vertices */ 2396 for (i=0;i<n_ISForFaces;i++) { 2397 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2398 } 2399 if (n_ISForFaces) { 2400 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2401 } 2402 for (i=0;i<n_ISForEdges;i++) { 2403 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2404 } 2405 if (n_ISForEdges) { 2406 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2407 } 2408 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2409 } else { 2410 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2411 2412 total_counts = 0; 2413 n_vertices = 0; 2414 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2415 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2416 } 2417 max_constraints = 0; 2418 total_counts_cc = 0; 2419 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2420 total_counts += pcbddc->adaptive_constraints_n[i]; 2421 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2422 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2423 } 2424 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2425 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2426 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2427 constraints_data = pcbddc->adaptive_constraints_data; 2428 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2429 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2430 total_counts_cc = 0; 2431 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2432 if (pcbddc->adaptive_constraints_n[i]) { 2433 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2434 } 2435 } 2436 #if 0 2437 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2438 for (i=0;i<total_counts_cc;i++) { 2439 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2440 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2441 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2442 printf(" %d",constraints_idxs[j]); 2443 } 2444 printf("\n"); 2445 printf("number of cc: %d\n",constraints_n[i]); 2446 } 2447 for (i=0;i<n_vertices;i++) { 2448 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2449 } 2450 for (i=0;i<sub_schurs->n_subs;i++) { 2451 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2452 } 2453 #endif 2454 2455 max_size_of_constraint = 0; 2456 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2457 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2458 /* Change of basis */ 2459 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2460 if (pcbddc->use_change_of_basis) { 2461 for (i=0;i<sub_schurs->n_subs;i++) { 2462 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2463 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2464 } 2465 } 2466 } 2467 } 2468 pcbddc->local_primal_size = total_counts; 2469 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2470 2471 /* map constraints_idxs in boundary numbering */ 2472 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2473 if (i != constraints_idxs_ptr[total_counts_cc]) { 2474 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2475 } 2476 2477 /* Create constraint matrix */ 2478 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2479 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2480 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2481 2482 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2483 /* determine if a QR strategy is needed for change of basis */ 2484 qr_needed = PETSC_FALSE; 2485 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2486 total_primal_vertices=0; 2487 pcbddc->local_primal_size_cc = 0; 2488 for (i=0;i<total_counts_cc;i++) { 2489 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2490 if (size_of_constraint == 1) { 2491 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2492 pcbddc->local_primal_size_cc += 1; 2493 } else if (PetscBTLookup(change_basis,i)) { 2494 for (k=0;k<constraints_n[i];k++) { 2495 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2496 } 2497 pcbddc->local_primal_size_cc += constraints_n[i]; 2498 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2499 PetscBTSet(qr_needed_idx,i); 2500 qr_needed = PETSC_TRUE; 2501 } 2502 } else { 2503 pcbddc->local_primal_size_cc += 1; 2504 } 2505 } 2506 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2507 pcbddc->n_vertices = total_primal_vertices; 2508 /* permute indices in order to have a sorted set of vertices */ 2509 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2510 2511 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2512 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2513 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2514 2515 /* nonzero structure of constraint matrix */ 2516 /* and get reference dof for local constraints */ 2517 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2518 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2519 2520 j = total_primal_vertices; 2521 total_counts = total_primal_vertices; 2522 cum = total_primal_vertices; 2523 for (i=n_vertices;i<total_counts_cc;i++) { 2524 if (!PetscBTLookup(change_basis,i)) { 2525 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2526 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2527 cum++; 2528 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2529 for (k=0;k<constraints_n[i];k++) { 2530 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2531 nnz[j+k] = size_of_constraint; 2532 } 2533 j += constraints_n[i]; 2534 } 2535 } 2536 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2537 ierr = PetscFree(nnz);CHKERRQ(ierr); 2538 2539 /* set values in constraint matrix */ 2540 for (i=0;i<total_primal_vertices;i++) { 2541 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2542 } 2543 total_counts = total_primal_vertices; 2544 for (i=n_vertices;i<total_counts_cc;i++) { 2545 if (!PetscBTLookup(change_basis,i)) { 2546 PetscInt *cols; 2547 2548 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2549 cols = constraints_idxs+constraints_idxs_ptr[i]; 2550 for (k=0;k<constraints_n[i];k++) { 2551 PetscInt row = total_counts+k; 2552 PetscScalar *vals; 2553 2554 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2555 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2556 } 2557 total_counts += constraints_n[i]; 2558 } 2559 } 2560 /* assembling */ 2561 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2562 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 2564 /* 2565 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2566 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2567 */ 2568 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2569 if (pcbddc->use_change_of_basis) { 2570 /* dual and primal dofs on a single cc */ 2571 PetscInt dual_dofs,primal_dofs; 2572 /* working stuff for GEQRF */ 2573 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2574 PetscBLASInt lqr_work; 2575 /* working stuff for UNGQR */ 2576 PetscScalar *gqr_work,lgqr_work_t; 2577 PetscBLASInt lgqr_work; 2578 /* working stuff for TRTRS */ 2579 PetscScalar *trs_rhs; 2580 PetscBLASInt Blas_NRHS; 2581 /* pointers for values insertion into change of basis matrix */ 2582 PetscInt *start_rows,*start_cols; 2583 PetscScalar *start_vals; 2584 /* working stuff for values insertion */ 2585 PetscBT is_primal; 2586 PetscInt *aux_primal_numbering_B; 2587 /* matrix sizes */ 2588 PetscInt global_size,local_size; 2589 /* temporary change of basis */ 2590 Mat localChangeOfBasisMatrix; 2591 /* extra space for debugging */ 2592 PetscScalar *dbg_work; 2593 2594 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2595 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2596 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2597 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2598 /* nonzeros for local mat */ 2599 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2600 for (i=0;i<pcis->n;i++) nnz[i]=1; 2601 for (i=n_vertices;i<total_counts_cc;i++) { 2602 if (PetscBTLookup(change_basis,i)) { 2603 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2604 if (PetscBTLookup(qr_needed_idx,i)) { 2605 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2606 } else { 2607 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2608 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2609 } 2610 } 2611 } 2612 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2613 ierr = PetscFree(nnz);CHKERRQ(ierr); 2614 /* Set initial identity in the matrix */ 2615 for (i=0;i<pcis->n;i++) { 2616 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2617 } 2618 2619 if (pcbddc->dbg_flag) { 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2622 } 2623 2624 2625 /* Now we loop on the constraints which need a change of basis */ 2626 /* 2627 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2628 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2629 2630 Basic blocks of change of basis matrix T computed by 2631 2632 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2633 2634 | 1 0 ... 0 s_1/S | 2635 | 0 1 ... 0 s_2/S | 2636 | ... | 2637 | 0 ... 1 s_{n-1}/S | 2638 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2639 2640 with S = \sum_{i=1}^n s_i^2 2641 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2642 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2643 2644 - QR decomposition of constraints otherwise 2645 */ 2646 if (qr_needed) { 2647 /* space to store Q */ 2648 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2649 /* first we issue queries for optimal work */ 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2651 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2653 lqr_work = -1; 2654 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2655 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2657 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2658 lgqr_work = -1; 2659 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2661 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2663 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2664 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2665 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2666 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2667 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2668 /* array to store scaling factors for reflectors */ 2669 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2670 /* array to store rhs and solution of triangular solver */ 2671 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2672 /* allocating workspace for check */ 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2675 } 2676 } 2677 /* array to store whether a node is primal or not */ 2678 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2679 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2680 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2681 if (i != total_primal_vertices) { 2682 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2683 } 2684 for (i=0;i<total_primal_vertices;i++) { 2685 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2686 } 2687 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2688 2689 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2690 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2691 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2692 if (PetscBTLookup(change_basis,total_counts)) { 2693 /* get constraint info */ 2694 primal_dofs = constraints_n[total_counts]; 2695 dual_dofs = size_of_constraint-primal_dofs; 2696 2697 if (pcbddc->dbg_flag) { 2698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2699 } 2700 2701 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2702 2703 /* copy quadrature constraints for change of basis check */ 2704 if (pcbddc->dbg_flag) { 2705 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2706 } 2707 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2708 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2709 2710 /* compute QR decomposition of constraints */ 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2712 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2714 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2715 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2716 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2717 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2718 2719 /* explictly compute R^-T */ 2720 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2721 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2726 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2727 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2728 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2730 2731 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2736 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2737 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2738 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2739 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2740 2741 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2742 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2743 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2750 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2751 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2752 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2753 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2754 2755 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2756 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2757 /* insert cols for primal dofs */ 2758 for (j=0;j<primal_dofs;j++) { 2759 start_vals = &qr_basis[j*size_of_constraint]; 2760 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2761 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2762 } 2763 /* insert cols for dual dofs */ 2764 for (j=0,k=0;j<dual_dofs;k++) { 2765 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2766 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2767 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2768 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2769 j++; 2770 } 2771 } 2772 2773 /* check change of basis */ 2774 if (pcbddc->dbg_flag) { 2775 PetscInt ii,jj; 2776 PetscBool valid_qr=PETSC_TRUE; 2777 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2784 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2785 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2786 for (jj=0;jj<size_of_constraint;jj++) { 2787 for (ii=0;ii<primal_dofs;ii++) { 2788 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2789 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2790 } 2791 } 2792 if (!valid_qr) { 2793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2794 for (jj=0;jj<size_of_constraint;jj++) { 2795 for (ii=0;ii<primal_dofs;ii++) { 2796 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2797 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2798 } 2799 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2800 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2801 } 2802 } 2803 } 2804 } else { 2805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2806 } 2807 } 2808 } else { /* simple transformation block */ 2809 PetscInt row,col; 2810 PetscScalar val,norm; 2811 2812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2813 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2814 for (j=0;j<size_of_constraint;j++) { 2815 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2816 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2817 if (!PetscBTLookup(is_primal,row_B)) { 2818 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2819 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2820 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2821 } else { 2822 for (k=0;k<size_of_constraint;k++) { 2823 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2824 if (row != col) { 2825 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2826 } else { 2827 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2828 } 2829 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2830 } 2831 } 2832 } 2833 if (pcbddc->dbg_flag) { 2834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2835 } 2836 } 2837 } else { 2838 if (pcbddc->dbg_flag) { 2839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2840 } 2841 } 2842 } 2843 2844 /* free workspace */ 2845 if (qr_needed) { 2846 if (pcbddc->dbg_flag) { 2847 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2848 } 2849 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2850 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2852 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2853 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2854 } 2855 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2856 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2857 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2858 2859 /* assembling of global change of variable */ 2860 { 2861 Mat tmat; 2862 PetscInt bs; 2863 2864 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2865 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2866 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2867 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2868 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2869 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2870 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2871 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2872 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2873 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2874 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2875 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2876 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2877 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2878 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2879 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2881 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2882 } 2883 /* check */ 2884 if (pcbddc->dbg_flag) { 2885 PetscReal error; 2886 Vec x,x_change; 2887 2888 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2889 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2890 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2891 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2895 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2896 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2897 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2898 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2899 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2901 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2902 ierr = VecDestroy(&x);CHKERRQ(ierr); 2903 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2904 } 2905 2906 /* adapt sub_schurs computed (if any) */ 2907 if (pcbddc->use_deluxe_scaling) { 2908 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2909 if (sub_schurs->S_Ej_all) { 2910 Mat S_new,tmat; 2911 IS is_all_N; 2912 2913 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2914 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2915 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2916 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2917 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2918 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2919 sub_schurs->S_Ej_all = S_new; 2920 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2921 if (sub_schurs->sum_S_Ej_all) { 2922 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2923 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2924 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2925 sub_schurs->sum_S_Ej_all = S_new; 2926 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2927 } 2928 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2929 } 2930 } 2931 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2932 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2933 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2934 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2935 } 2936 2937 /* set up change of basis context */ 2938 if (pcbddc->ChangeOfBasisMatrix) { 2939 PCBDDCChange_ctx change_ctx; 2940 2941 if (!pcbddc->new_global_mat) { 2942 PetscInt global_size,local_size; 2943 2944 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2945 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2946 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2947 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2948 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2949 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2950 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2951 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2952 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2953 } else { 2954 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2955 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2956 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2957 } 2958 if (!pcbddc->user_ChangeOfBasisMatrix) { 2959 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2960 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2961 } else { 2962 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2963 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2964 } 2965 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2966 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2967 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 } 2970 2971 /* check if a new primal space has been introduced */ 2972 pcbddc->new_primal_space_local = PETSC_TRUE; 2973 if (olocal_primal_size == pcbddc->local_primal_size) { 2974 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2975 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2976 if (!pcbddc->new_primal_space_local) { 2977 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2978 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2979 } 2980 } 2981 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2982 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2983 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2984 2985 /* flush dbg viewer */ 2986 if (pcbddc->dbg_flag) { 2987 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2988 } 2989 2990 /* free workspace */ 2991 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2992 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2993 if (!pcbddc->adaptive_selection) { 2994 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2995 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2996 } else { 2997 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2998 pcbddc->adaptive_constraints_idxs_ptr, 2999 pcbddc->adaptive_constraints_data_ptr, 3000 pcbddc->adaptive_constraints_idxs, 3001 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3002 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3003 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3010 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3011 { 3012 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3013 PC_IS *pcis = (PC_IS*)pc->data; 3014 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3015 PetscInt ierr,i,vertex_size,N; 3016 PetscViewer viewer=pcbddc->dbg_viewer; 3017 3018 PetscFunctionBegin; 3019 /* Reset previously computed graph */ 3020 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3021 /* Init local Graph struct */ 3022 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3023 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3024 3025 /* Check validity of the csr graph passed in by the user */ 3026 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3027 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3028 } 3029 3030 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3031 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3032 PetscInt *xadj,*adjncy; 3033 PetscInt nvtxs; 3034 PetscBool flg_row=PETSC_FALSE; 3035 3036 if (pcbddc->use_local_adj) { 3037 3038 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3039 if (flg_row) { 3040 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3041 pcbddc->computed_rowadj = PETSC_TRUE; 3042 } 3043 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3044 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3045 IS is_dummy; 3046 ISLocalToGlobalMapping l2gmap_dummy; 3047 PetscInt j,sum; 3048 PetscInt *cxadj,*cadjncy; 3049 const PetscInt *idxs; 3050 PCBDDCGraph graph; 3051 PetscBT is_on_boundary; 3052 3053 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3054 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3055 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3056 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3057 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3058 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3059 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3060 if (flg_row) { 3061 graph->xadj = xadj; 3062 graph->adjncy = adjncy; 3063 } 3064 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3065 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3066 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3067 3068 if (pcbddc->dbg_flag) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3070 for (i=0;i<graph->ncc;i++) { 3071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3072 } 3073 } 3074 3075 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3076 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3077 for (i=0;i<pcis->n_B;i++) { 3078 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3079 } 3080 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3081 3082 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3083 sum = 0; 3084 for (i=0;i<graph->ncc;i++) { 3085 PetscInt sizecc = 0; 3086 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3087 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3088 sizecc++; 3089 } 3090 } 3091 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3092 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3093 cxadj[graph->queue[j]] = sizecc; 3094 } 3095 } 3096 sum += sizecc*sizecc; 3097 } 3098 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3099 sum = 0; 3100 for (i=0;i<pcis->n;i++) { 3101 PetscInt temp = cxadj[i]; 3102 cxadj[i] = sum; 3103 sum += temp; 3104 } 3105 cxadj[pcis->n] = sum; 3106 for (i=0;i<graph->ncc;i++) { 3107 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3108 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3109 PetscInt k,sizecc = 0; 3110 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3111 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3112 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3113 sizecc++; 3114 } 3115 } 3116 } 3117 } 3118 } 3119 if (sum) { 3120 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3121 } else { 3122 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3123 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3124 } 3125 graph->xadj = 0; 3126 graph->adjncy = 0; 3127 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3128 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3129 } 3130 } 3131 if (pcbddc->dbg_flag) { 3132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3133 } 3134 3135 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3136 vertex_size = 1; 3137 if (pcbddc->user_provided_isfordofs) { 3138 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3139 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3140 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3141 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3142 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3143 } 3144 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3145 pcbddc->n_ISForDofs = 0; 3146 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3147 } 3148 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3149 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3150 } else { 3151 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3152 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3153 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3154 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3155 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3156 } 3157 } 3158 } 3159 3160 /* Setup of Graph */ 3161 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3162 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3163 } 3164 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3165 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3166 } 3167 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3168 3169 /* Graph's connected components analysis */ 3170 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3171 3172 /* print some info to stdout */ 3173 if (pcbddc->dbg_flag) { 3174 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3175 } 3176 3177 /* mark topography has done */ 3178 pcbddc->recompute_topography = PETSC_FALSE; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "PCBDDCSubsetNumbering" 3185 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3186 { 3187 PetscSF sf; 3188 PetscLayout map; 3189 const PetscInt *idxs; 3190 PetscInt *leaf_data,*root_data,*gidxs; 3191 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3192 PetscInt n_n,nlocals,start,first_index; 3193 PetscMPIInt commsize; 3194 PetscBool first_found; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3199 if (subset_mult) { 3200 PetscCheckSameComm(subset,1,subset_mult,2); 3201 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3202 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3203 } 3204 /* create workspace layout for computing global indices of subset */ 3205 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3206 lbounds[0] = lbounds[1] = 0; 3207 for (i=0;i<n;i++) { 3208 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3209 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3210 } 3211 lbounds[0] = -lbounds[0]; 3212 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3213 gbounds[0] = -gbounds[0]; 3214 N = gbounds[1] - gbounds[0] + 1; 3215 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3216 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3219 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3220 3221 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3222 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3223 if (subset_mult) { 3224 const PetscInt* idxs_mult; 3225 3226 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3227 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3228 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3229 } else { 3230 for (i=0;i<n;i++) leaf_data[i] = 1; 3231 } 3232 /* local size of new subset */ 3233 n_n = 0; 3234 for (i=0;i<n;i++) n_n += leaf_data[i]; 3235 3236 /* global indexes in layout */ 3237 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3238 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3239 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3240 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3241 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3242 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3243 3244 /* reduce from leaves to roots */ 3245 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3246 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3247 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3248 3249 /* count indexes in local part of layout */ 3250 nlocals = 0; 3251 first_index = -1; 3252 first_found = PETSC_FALSE; 3253 for (i=0;i<Nl;i++) { 3254 if (!first_found && root_data[i]) { 3255 first_found = PETSC_TRUE; 3256 first_index = i; 3257 } 3258 nlocals += root_data[i]; 3259 } 3260 3261 /* cumulative of number of indexes and size of subset without holes */ 3262 #if defined(PETSC_HAVE_MPI_EXSCAN) 3263 start = 0; 3264 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3265 #else 3266 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3267 start = start-nlocals; 3268 #endif 3269 3270 if (N_n) { /* compute total size of new subset if requested */ 3271 *N_n = start + nlocals; 3272 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3273 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3274 } 3275 3276 /* adapt root data with cumulative */ 3277 if (first_found) { 3278 PetscInt old_index; 3279 3280 root_data[first_index] += start; 3281 old_index = first_index; 3282 for (i=first_index+1;i<Nl;i++) { 3283 if (root_data[i]) { 3284 root_data[i] += root_data[old_index]; 3285 old_index = i; 3286 } 3287 } 3288 } 3289 3290 /* from roots to leaves */ 3291 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3292 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3293 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3294 3295 /* create new IS with global indexes without holes */ 3296 if (subset_mult) { 3297 const PetscInt* idxs_mult; 3298 PetscInt cum; 3299 3300 cum = 0; 3301 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3302 for (i=0;i<n;i++) { 3303 PetscInt j; 3304 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3305 } 3306 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3307 } else { 3308 for (i=0;i<n;i++) { 3309 gidxs[i] = leaf_data[i]-1; 3310 } 3311 } 3312 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3313 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3319 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3320 { 3321 PetscInt i,j; 3322 PetscScalar *alphas; 3323 PetscErrorCode ierr; 3324 3325 PetscFunctionBegin; 3326 /* this implements stabilized Gram-Schmidt */ 3327 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3328 for (i=0;i<n;i++) { 3329 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3330 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3331 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3332 } 3333 ierr = PetscFree(alphas);CHKERRQ(ierr); 3334 PetscFunctionReturn(0); 3335 } 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatISGetSubassemblingPattern" 3339 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3340 { 3341 IS ranks_send_to; 3342 PetscInt n_neighs,*neighs,*n_shared,**shared; 3343 PetscMPIInt size,rank,color; 3344 PetscInt *xadj,*adjncy; 3345 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3346 PetscInt i,local_size,threshold=0; 3347 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3348 PetscSubcomm subcomm; 3349 PetscErrorCode ierr; 3350 3351 PetscFunctionBegin; 3352 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3353 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3354 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3355 3356 /* Get info on mapping */ 3357 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3358 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3359 3360 /* build local CSR graph of subdomains' connectivity */ 3361 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3362 xadj[0] = 0; 3363 xadj[1] = PetscMax(n_neighs-1,0); 3364 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3365 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3366 3367 if (threshold) { 3368 PetscInt xadj_count = 0; 3369 for (i=1;i<n_neighs;i++) { 3370 if (n_shared[i] > threshold) { 3371 adjncy[xadj_count] = neighs[i]; 3372 adjncy_wgt[xadj_count] = n_shared[i]; 3373 xadj_count++; 3374 } 3375 } 3376 xadj[1] = xadj_count; 3377 } else { 3378 if (xadj[1]) { 3379 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3380 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3381 } 3382 } 3383 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3384 if (use_square) { 3385 for (i=0;i<xadj[1];i++) { 3386 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3387 } 3388 } 3389 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3390 3391 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3392 3393 /* 3394 Restrict work on active processes only. 3395 */ 3396 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3397 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3398 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3399 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3400 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3401 if (color) { 3402 ierr = PetscFree(xadj);CHKERRQ(ierr); 3403 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3404 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3405 } else { 3406 Mat subdomain_adj; 3407 IS new_ranks,new_ranks_contig; 3408 MatPartitioning partitioner; 3409 PetscInt prank,rstart=0,rend=0; 3410 PetscInt *is_indices,*oldranks; 3411 PetscBool aggregate; 3412 3413 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3414 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3415 prank = rank; 3416 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3417 /* 3418 for (i=0;i<size;i++) { 3419 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3420 } 3421 */ 3422 for (i=0;i<xadj[1];i++) { 3423 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3424 } 3425 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3426 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3427 if (aggregate) { 3428 PetscInt lrows,row,ncols,*cols; 3429 PetscMPIInt nrank; 3430 PetscScalar *vals; 3431 3432 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3433 lrows = 0; 3434 if (nrank<redprocs) { 3435 lrows = size/redprocs; 3436 if (nrank<size%redprocs) lrows++; 3437 } 3438 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3439 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3440 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3441 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3442 row = nrank; 3443 ncols = xadj[1]-xadj[0]; 3444 cols = adjncy; 3445 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3446 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3447 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3448 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3449 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 ierr = PetscFree(xadj);CHKERRQ(ierr); 3451 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3452 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3453 ierr = PetscFree(vals);CHKERRQ(ierr); 3454 } else { 3455 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3456 } 3457 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3458 3459 /* Partition */ 3460 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3461 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3462 if (use_vwgt) { 3463 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3464 v_wgt[0] = local_size; 3465 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3466 } 3467 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3468 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3469 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3470 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3471 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3472 3473 /* renumber new_ranks to avoid "holes" in new set of processors */ 3474 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3475 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3476 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3477 if (!redprocs) { 3478 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3479 } else { 3480 PetscInt idxs[1]; 3481 PetscMPIInt tag; 3482 MPI_Request *reqs; 3483 3484 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3485 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3486 for (i=rstart;i<rend;i++) { 3487 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3488 } 3489 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3490 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3491 ierr = PetscFree(reqs);CHKERRQ(ierr); 3492 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3493 } 3494 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3495 /* clean up */ 3496 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3497 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3498 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3499 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3500 } 3501 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3502 3503 /* assemble parallel IS for sends */ 3504 i = 1; 3505 if (color) i=0; 3506 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3507 /* get back IS */ 3508 *is_sends = ranks_send_to; 3509 PetscFunctionReturn(0); 3510 } 3511 3512 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3513 3514 #undef __FUNCT__ 3515 #define __FUNCT__ "MatISSubassemble" 3516 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3517 { 3518 Mat local_mat; 3519 IS is_sends_internal; 3520 PetscInt rows,cols,new_local_rows; 3521 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3522 PetscBool ismatis,isdense,newisdense,destroy_mat; 3523 ISLocalToGlobalMapping l2gmap; 3524 PetscInt* l2gmap_indices; 3525 const PetscInt* is_indices; 3526 MatType new_local_type; 3527 /* buffers */ 3528 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3529 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3530 PetscInt *recv_buffer_idxs_local; 3531 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3532 /* MPI */ 3533 MPI_Comm comm,comm_n; 3534 PetscSubcomm subcomm; 3535 PetscMPIInt n_sends,n_recvs,commsize; 3536 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3537 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3538 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3539 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3540 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3541 PetscErrorCode ierr; 3542 3543 PetscFunctionBegin; 3544 /* TODO: add missing checks */ 3545 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3546 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3547 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3548 PetscValidLogicalCollectiveInt(mat,nis,7); 3549 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3550 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3551 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3552 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3553 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3554 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3555 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3556 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3557 PetscInt mrows,mcols,mnrows,mncols; 3558 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3559 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3560 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3561 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3562 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3563 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3564 } 3565 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3566 PetscValidLogicalCollectiveInt(mat,bs,0); 3567 /* prepare IS for sending if not provided */ 3568 if (!is_sends) { 3569 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3570 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3571 } else { 3572 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3573 is_sends_internal = is_sends; 3574 } 3575 3576 /* get comm */ 3577 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3578 3579 /* compute number of sends */ 3580 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3581 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3582 3583 /* compute number of receives */ 3584 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3585 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3586 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3587 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3588 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3589 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3590 ierr = PetscFree(iflags);CHKERRQ(ierr); 3591 3592 /* restrict comm if requested */ 3593 subcomm = 0; 3594 destroy_mat = PETSC_FALSE; 3595 if (restrict_comm) { 3596 PetscMPIInt color,subcommsize; 3597 3598 color = 0; 3599 if (restrict_full) { 3600 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3601 } else { 3602 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3603 } 3604 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3605 subcommsize = commsize - subcommsize; 3606 /* check if reuse has been requested */ 3607 if (reuse == MAT_REUSE_MATRIX) { 3608 if (*mat_n) { 3609 PetscMPIInt subcommsize2; 3610 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3611 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3612 comm_n = PetscObjectComm((PetscObject)*mat_n); 3613 } else { 3614 comm_n = PETSC_COMM_SELF; 3615 } 3616 } else { /* MAT_INITIAL_MATRIX */ 3617 PetscMPIInt rank; 3618 3619 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3620 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3621 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3622 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3623 comm_n = PetscSubcommChild(subcomm); 3624 } 3625 /* flag to destroy *mat_n if not significative */ 3626 if (color) destroy_mat = PETSC_TRUE; 3627 } else { 3628 comm_n = comm; 3629 } 3630 3631 /* prepare send/receive buffers */ 3632 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3633 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3634 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3635 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3636 if (nis) { 3637 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3638 } 3639 3640 /* Get data from local matrices */ 3641 if (!isdense) { 3642 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3643 /* TODO: See below some guidelines on how to prepare the local buffers */ 3644 /* 3645 send_buffer_vals should contain the raw values of the local matrix 3646 send_buffer_idxs should contain: 3647 - MatType_PRIVATE type 3648 - PetscInt size_of_l2gmap 3649 - PetscInt global_row_indices[size_of_l2gmap] 3650 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3651 */ 3652 } else { 3653 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3654 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3655 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3656 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3657 send_buffer_idxs[1] = i; 3658 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3659 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3660 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3661 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3662 for (i=0;i<n_sends;i++) { 3663 ilengths_vals[is_indices[i]] = len*len; 3664 ilengths_idxs[is_indices[i]] = len+2; 3665 } 3666 } 3667 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3668 /* additional is (if any) */ 3669 if (nis) { 3670 PetscMPIInt psum; 3671 PetscInt j; 3672 for (j=0,psum=0;j<nis;j++) { 3673 PetscInt plen; 3674 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3675 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3676 psum += len+1; /* indices + lenght */ 3677 } 3678 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3679 for (j=0,psum=0;j<nis;j++) { 3680 PetscInt plen; 3681 const PetscInt *is_array_idxs; 3682 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3683 send_buffer_idxs_is[psum] = plen; 3684 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3685 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3686 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3687 psum += plen+1; /* indices + lenght */ 3688 } 3689 for (i=0;i<n_sends;i++) { 3690 ilengths_idxs_is[is_indices[i]] = psum; 3691 } 3692 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3693 } 3694 3695 buf_size_idxs = 0; 3696 buf_size_vals = 0; 3697 buf_size_idxs_is = 0; 3698 for (i=0;i<n_recvs;i++) { 3699 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3700 buf_size_vals += (PetscInt)olengths_vals[i]; 3701 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3702 } 3703 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3704 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3705 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3706 3707 /* get new tags for clean communications */ 3708 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3709 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3710 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3711 3712 /* allocate for requests */ 3713 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3714 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3715 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3716 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3717 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3718 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3719 3720 /* communications */ 3721 ptr_idxs = recv_buffer_idxs; 3722 ptr_vals = recv_buffer_vals; 3723 ptr_idxs_is = recv_buffer_idxs_is; 3724 for (i=0;i<n_recvs;i++) { 3725 source_dest = onodes[i]; 3726 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3727 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3728 ptr_idxs += olengths_idxs[i]; 3729 ptr_vals += olengths_vals[i]; 3730 if (nis) { 3731 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3732 ptr_idxs_is += olengths_idxs_is[i]; 3733 } 3734 } 3735 for (i=0;i<n_sends;i++) { 3736 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3737 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3738 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3739 if (nis) { 3740 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3741 } 3742 } 3743 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3744 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3745 3746 /* assemble new l2g map */ 3747 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3748 ptr_idxs = recv_buffer_idxs; 3749 new_local_rows = 0; 3750 for (i=0;i<n_recvs;i++) { 3751 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3752 ptr_idxs += olengths_idxs[i]; 3753 } 3754 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3755 ptr_idxs = recv_buffer_idxs; 3756 new_local_rows = 0; 3757 for (i=0;i<n_recvs;i++) { 3758 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3759 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3760 ptr_idxs += olengths_idxs[i]; 3761 } 3762 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3763 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3764 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3765 3766 /* infer new local matrix type from received local matrices type */ 3767 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3768 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3769 if (n_recvs) { 3770 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3771 ptr_idxs = recv_buffer_idxs; 3772 for (i=0;i<n_recvs;i++) { 3773 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3774 new_local_type_private = MATAIJ_PRIVATE; 3775 break; 3776 } 3777 ptr_idxs += olengths_idxs[i]; 3778 } 3779 switch (new_local_type_private) { 3780 case MATDENSE_PRIVATE: 3781 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3782 new_local_type = MATSEQAIJ; 3783 bs = 1; 3784 } else { /* if I receive only 1 dense matrix */ 3785 new_local_type = MATSEQDENSE; 3786 bs = 1; 3787 } 3788 break; 3789 case MATAIJ_PRIVATE: 3790 new_local_type = MATSEQAIJ; 3791 bs = 1; 3792 break; 3793 case MATBAIJ_PRIVATE: 3794 new_local_type = MATSEQBAIJ; 3795 break; 3796 case MATSBAIJ_PRIVATE: 3797 new_local_type = MATSEQSBAIJ; 3798 break; 3799 default: 3800 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3801 break; 3802 } 3803 } else { /* by default, new_local_type is seqdense */ 3804 new_local_type = MATSEQDENSE; 3805 bs = 1; 3806 } 3807 3808 /* create MATIS object if needed */ 3809 if (reuse == MAT_INITIAL_MATRIX) { 3810 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3811 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3812 } else { 3813 /* it also destroys the local matrices */ 3814 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3815 } 3816 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3817 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3818 3819 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3820 3821 /* Global to local map of received indices */ 3822 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3823 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3824 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3825 3826 /* restore attributes -> type of incoming data and its size */ 3827 buf_size_idxs = 0; 3828 for (i=0;i<n_recvs;i++) { 3829 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3830 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3831 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3832 } 3833 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3834 3835 /* set preallocation */ 3836 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3837 if (!newisdense) { 3838 PetscInt *new_local_nnz=0; 3839 3840 ptr_vals = recv_buffer_vals; 3841 ptr_idxs = recv_buffer_idxs_local; 3842 if (n_recvs) { 3843 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3844 } 3845 for (i=0;i<n_recvs;i++) { 3846 PetscInt j; 3847 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3848 for (j=0;j<*(ptr_idxs+1);j++) { 3849 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3850 } 3851 } else { 3852 /* TODO */ 3853 } 3854 ptr_idxs += olengths_idxs[i]; 3855 } 3856 if (new_local_nnz) { 3857 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3858 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3859 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3860 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3861 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3862 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3863 } else { 3864 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3865 } 3866 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3867 } else { 3868 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3869 } 3870 3871 /* set values */ 3872 ptr_vals = recv_buffer_vals; 3873 ptr_idxs = recv_buffer_idxs_local; 3874 for (i=0;i<n_recvs;i++) { 3875 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3876 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3877 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3878 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3881 } else { 3882 /* TODO */ 3883 } 3884 ptr_idxs += olengths_idxs[i]; 3885 ptr_vals += olengths_vals[i]; 3886 } 3887 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3888 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3892 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3893 3894 #if 0 3895 if (!restrict_comm) { /* check */ 3896 Vec lvec,rvec; 3897 PetscReal infty_error; 3898 3899 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3900 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3901 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3902 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3903 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3904 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3905 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3906 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3907 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3908 } 3909 #endif 3910 3911 /* assemble new additional is (if any) */ 3912 if (nis) { 3913 PetscInt **temp_idxs,*count_is,j,psum; 3914 3915 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3916 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3917 ptr_idxs = recv_buffer_idxs_is; 3918 psum = 0; 3919 for (i=0;i<n_recvs;i++) { 3920 for (j=0;j<nis;j++) { 3921 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3922 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3923 psum += plen; 3924 ptr_idxs += plen+1; /* shift pointer to received data */ 3925 } 3926 } 3927 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3928 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3929 for (i=1;i<nis;i++) { 3930 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3931 } 3932 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3933 ptr_idxs = recv_buffer_idxs_is; 3934 for (i=0;i<n_recvs;i++) { 3935 for (j=0;j<nis;j++) { 3936 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3937 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3938 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3939 ptr_idxs += plen+1; /* shift pointer to received data */ 3940 } 3941 } 3942 for (i=0;i<nis;i++) { 3943 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3944 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3945 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3946 } 3947 ierr = PetscFree(count_is);CHKERRQ(ierr); 3948 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3949 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3950 } 3951 /* free workspace */ 3952 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3953 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3954 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3955 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3956 if (isdense) { 3957 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3958 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3959 } else { 3960 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3961 } 3962 if (nis) { 3963 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3964 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3965 } 3966 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3967 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3968 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3969 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3970 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3971 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3972 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3973 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3974 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3975 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3976 ierr = PetscFree(onodes);CHKERRQ(ierr); 3977 if (nis) { 3978 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3979 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3980 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3981 } 3982 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3983 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3984 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3985 for (i=0;i<nis;i++) { 3986 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3987 } 3988 *mat_n = NULL; 3989 } 3990 PetscFunctionReturn(0); 3991 } 3992 3993 /* temporary hack into ksp private data structure */ 3994 #include <petsc/private/kspimpl.h> 3995 3996 #undef __FUNCT__ 3997 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3998 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3999 { 4000 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4001 PC_IS *pcis = (PC_IS*)pc->data; 4002 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4003 MatNullSpace CoarseNullSpace=NULL; 4004 ISLocalToGlobalMapping coarse_islg; 4005 IS coarse_is,*isarray; 4006 PetscInt i,im_active=-1,active_procs=-1; 4007 PetscInt nis,nisdofs,nisneu; 4008 PC pc_temp; 4009 PCType coarse_pc_type; 4010 KSPType coarse_ksp_type; 4011 PetscBool multilevel_requested,multilevel_allowed; 4012 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4013 Mat t_coarse_mat_is; 4014 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4015 PetscMPIInt all_procs; 4016 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4017 PetscBool compute_vecs = PETSC_FALSE; 4018 PetscScalar *array; 4019 PetscErrorCode ierr; 4020 4021 PetscFunctionBegin; 4022 /* Assign global numbering to coarse dofs */ 4023 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4024 PetscInt ocoarse_size; 4025 compute_vecs = PETSC_TRUE; 4026 ocoarse_size = pcbddc->coarse_size; 4027 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4028 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4029 /* see if we can avoid some work */ 4030 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4031 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4032 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4033 PC pc; 4034 PetscBool isbddc; 4035 4036 /* temporary workaround since PCBDDC does not have a reset method so far */ 4037 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4038 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4039 if (isbddc) { 4040 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4041 } 4042 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4043 coarse_reuse = PETSC_FALSE; 4044 } else { /* we can safely reuse already computed coarse matrix */ 4045 coarse_reuse = PETSC_TRUE; 4046 } 4047 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4048 coarse_reuse = PETSC_FALSE; 4049 } 4050 /* reset any subassembling information */ 4051 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4052 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4053 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4054 coarse_reuse = PETSC_TRUE; 4055 } 4056 4057 /* count "active" (i.e. with positive local size) and "void" processes */ 4058 im_active = !!(pcis->n); 4059 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4060 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4061 void_procs = all_procs-active_procs; 4062 csin_type_simple = PETSC_TRUE; 4063 redist = PETSC_FALSE; 4064 if (pcbddc->current_level && void_procs) { 4065 csin_ml = PETSC_TRUE; 4066 ncoarse_ml = void_procs; 4067 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4068 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4069 csin_ds = PETSC_TRUE; 4070 ncoarse_ds = pcbddc->redistribute_coarse; 4071 redist = PETSC_TRUE; 4072 } else { 4073 csin_ds = PETSC_TRUE; 4074 ncoarse_ds = active_procs; 4075 redist = PETSC_TRUE; 4076 } 4077 } else { 4078 csin_ml = PETSC_FALSE; 4079 ncoarse_ml = all_procs; 4080 if (void_procs) { 4081 csin_ds = PETSC_TRUE; 4082 ncoarse_ds = void_procs; 4083 csin_type_simple = PETSC_FALSE; 4084 } else { 4085 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4086 csin_ds = PETSC_TRUE; 4087 ncoarse_ds = pcbddc->redistribute_coarse; 4088 redist = PETSC_TRUE; 4089 } else { 4090 csin_ds = PETSC_FALSE; 4091 ncoarse_ds = all_procs; 4092 } 4093 } 4094 } 4095 4096 /* 4097 test if we can go multilevel: three conditions must be satisfied: 4098 - we have not exceeded the number of levels requested 4099 - we can actually subassemble the active processes 4100 - we can find a suitable number of MPI processes where we can place the subassembled problem 4101 */ 4102 multilevel_allowed = PETSC_FALSE; 4103 multilevel_requested = PETSC_FALSE; 4104 if (pcbddc->current_level < pcbddc->max_levels) { 4105 multilevel_requested = PETSC_TRUE; 4106 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4107 multilevel_allowed = PETSC_FALSE; 4108 } else { 4109 multilevel_allowed = PETSC_TRUE; 4110 } 4111 } 4112 /* determine number of process partecipating to coarse solver */ 4113 if (multilevel_allowed) { 4114 ncoarse = ncoarse_ml; 4115 csin = csin_ml; 4116 redist = PETSC_FALSE; 4117 } else { 4118 ncoarse = ncoarse_ds; 4119 csin = csin_ds; 4120 } 4121 4122 /* creates temporary l2gmap and IS for coarse indexes */ 4123 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4124 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4125 4126 /* creates temporary MATIS object for coarse matrix */ 4127 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4128 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4129 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4130 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4131 #if 0 4132 { 4133 PetscViewer viewer; 4134 char filename[256]; 4135 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4136 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4137 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4138 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4139 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4140 } 4141 #endif 4142 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4143 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4144 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4145 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4146 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4147 4148 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4149 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4150 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4151 const PetscInt *idxs; 4152 ISLocalToGlobalMapping tmap; 4153 4154 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4155 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4156 /* allocate space for temporary storage */ 4157 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4158 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4159 /* allocate for IS array */ 4160 nisdofs = pcbddc->n_ISForDofsLocal; 4161 nisneu = !!pcbddc->NeumannBoundariesLocal; 4162 nis = nisdofs + nisneu; 4163 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4164 /* dofs splitting */ 4165 for (i=0;i<nisdofs;i++) { 4166 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4167 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4168 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4169 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4170 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4171 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4172 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4173 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4174 } 4175 /* neumann boundaries */ 4176 if (pcbddc->NeumannBoundariesLocal) { 4177 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4178 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4179 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4180 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4181 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4182 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4183 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4184 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4185 } 4186 /* free memory */ 4187 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4188 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4189 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4190 } else { 4191 nis = 0; 4192 nisdofs = 0; 4193 nisneu = 0; 4194 isarray = NULL; 4195 } 4196 /* destroy no longer needed map */ 4197 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4198 4199 /* restrict on coarse candidates (if needed) */ 4200 coarse_mat_is = NULL; 4201 if (csin) { 4202 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4203 if (redist) { 4204 PetscMPIInt rank; 4205 PetscInt spc,n_spc_p1,dest[1],destsize; 4206 4207 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4208 spc = active_procs/ncoarse; 4209 n_spc_p1 = active_procs%ncoarse; 4210 if (im_active) { 4211 destsize = 1; 4212 if (rank > n_spc_p1*(spc+1)-1) { 4213 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4214 } else { 4215 dest[0] = rank/(spc+1); 4216 } 4217 } else { 4218 destsize = 0; 4219 } 4220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4221 } else if (csin_type_simple) { 4222 PetscMPIInt rank; 4223 PetscInt issize,isidx; 4224 4225 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4226 if (im_active) { 4227 issize = 1; 4228 isidx = (PetscInt)rank; 4229 } else { 4230 issize = 0; 4231 isidx = -1; 4232 } 4233 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4234 } else { /* get a suitable subassembling pattern from MATIS code */ 4235 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4236 } 4237 4238 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4239 if (!redist || ncoarse <= void_procs) { 4240 PetscInt ncoarse_cand,tissize,*nisindices; 4241 PetscInt *coarse_candidates; 4242 const PetscInt* tisindices; 4243 4244 /* get coarse candidates' ranks in pc communicator */ 4245 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4246 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4247 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4248 if (!coarse_candidates[i]) { 4249 coarse_candidates[ncoarse_cand++]=i; 4250 } 4251 } 4252 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4253 4254 4255 if (pcbddc->dbg_flag) { 4256 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4258 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4260 for (i=0;i<ncoarse_cand;i++) { 4261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4262 } 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4265 } 4266 /* shift the pattern on coarse candidates */ 4267 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4268 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4269 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4270 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4271 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4272 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4273 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4274 } 4275 if (pcbddc->dbg_flag) { 4276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4278 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4280 } 4281 } 4282 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4283 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4284 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4285 } else { /* this is the last level, so use just receiving processes in subcomm */ 4286 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4287 } 4288 } else { 4289 if (pcbddc->dbg_flag) { 4290 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4292 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4293 } 4294 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4295 coarse_mat_is = t_coarse_mat_is; 4296 } 4297 4298 /* create local to global scatters for coarse problem */ 4299 if (compute_vecs) { 4300 PetscInt lrows; 4301 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4302 if (coarse_mat_is) { 4303 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4304 } else { 4305 lrows = 0; 4306 } 4307 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4308 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4309 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4310 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4311 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4312 } 4313 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4314 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4315 4316 /* set defaults for coarse KSP and PC */ 4317 if (multilevel_allowed) { 4318 coarse_ksp_type = KSPRICHARDSON; 4319 coarse_pc_type = PCBDDC; 4320 } else { 4321 coarse_ksp_type = KSPPREONLY; 4322 coarse_pc_type = PCREDUNDANT; 4323 } 4324 4325 /* print some info if requested */ 4326 if (pcbddc->dbg_flag) { 4327 if (!multilevel_allowed) { 4328 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4329 if (multilevel_requested) { 4330 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4331 } else if (pcbddc->max_levels) { 4332 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4333 } 4334 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4335 } 4336 } 4337 4338 /* create the coarse KSP object only once with defaults */ 4339 if (coarse_mat_is) { 4340 MatReuse coarse_mat_reuse; 4341 PetscViewer dbg_viewer = NULL; 4342 if (pcbddc->dbg_flag) { 4343 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4344 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4345 } 4346 if (!pcbddc->coarse_ksp) { 4347 char prefix[256],str_level[16]; 4348 size_t len; 4349 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4350 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4351 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4352 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4353 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4354 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4355 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4356 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4357 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4358 /* prefix */ 4359 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4360 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4361 if (!pcbddc->current_level) { 4362 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4363 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4364 } else { 4365 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4366 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4367 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4368 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4369 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4370 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4371 } 4372 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4373 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4374 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4375 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4376 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4377 /* allow user customization */ 4378 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4379 } 4380 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4381 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4382 if (nisdofs) { 4383 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4384 for (i=0;i<nisdofs;i++) { 4385 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4386 } 4387 } 4388 if (nisneu) { 4389 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4390 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4391 } 4392 4393 /* get some info after set from options */ 4394 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4395 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4396 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4397 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4398 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4399 isbddc = PETSC_FALSE; 4400 } 4401 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4402 if (isredundant) { 4403 KSP inner_ksp; 4404 PC inner_pc; 4405 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4406 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4407 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4408 } 4409 4410 /* assemble coarse matrix */ 4411 if (coarse_reuse) { 4412 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4413 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4414 coarse_mat_reuse = MAT_REUSE_MATRIX; 4415 } else { 4416 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4417 } 4418 if (isbddc || isnn) { 4419 if (pcbddc->coarsening_ratio > 1) { 4420 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4421 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4422 if (pcbddc->dbg_flag) { 4423 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4424 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4425 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4426 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4427 } 4428 } 4429 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4430 } else { 4431 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4432 coarse_mat = coarse_mat_is; 4433 } 4434 } else { 4435 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4436 } 4437 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4438 4439 /* propagate symmetry info of coarse matrix */ 4440 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4441 if (pc->pmat->symmetric_set) { 4442 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4443 } 4444 if (pc->pmat->hermitian_set) { 4445 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4446 } 4447 if (pc->pmat->spd_set) { 4448 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4449 } 4450 /* set operators */ 4451 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4452 if (pcbddc->dbg_flag) { 4453 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4454 } 4455 } else { /* processes non partecipating to coarse solver (if any) */ 4456 coarse_mat = 0; 4457 } 4458 ierr = PetscFree(isarray);CHKERRQ(ierr); 4459 #if 0 4460 { 4461 PetscViewer viewer; 4462 char filename[256]; 4463 sprintf(filename,"coarse_mat.m"); 4464 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4465 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4466 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4467 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4468 } 4469 #endif 4470 4471 /* Compute coarse null space (special handling by BDDC only) */ 4472 #if 0 4473 if (pcbddc->NullSpace) { 4474 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4475 } 4476 #endif 4477 4478 if (pcbddc->coarse_ksp) { 4479 Vec crhs,csol; 4480 PetscBool ispreonly; 4481 4482 if (CoarseNullSpace) { 4483 if (isbddc) { 4484 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4485 } else { 4486 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4487 } 4488 } 4489 /* setup coarse ksp */ 4490 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4491 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4492 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4493 /* hack */ 4494 if (!csol) { 4495 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4496 } 4497 if (!crhs) { 4498 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4499 } 4500 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4501 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4502 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4503 KSP check_ksp; 4504 KSPType check_ksp_type; 4505 PC check_pc; 4506 Vec check_vec,coarse_vec; 4507 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4508 PetscInt its; 4509 PetscBool compute_eigs; 4510 PetscReal *eigs_r,*eigs_c; 4511 PetscInt neigs; 4512 const char *prefix; 4513 4514 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4515 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4516 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4517 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4518 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4519 if (ispreonly) { 4520 check_ksp_type = KSPPREONLY; 4521 compute_eigs = PETSC_FALSE; 4522 } else { 4523 check_ksp_type = KSPGMRES; 4524 compute_eigs = PETSC_TRUE; 4525 } 4526 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4527 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4528 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4529 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4530 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4531 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4532 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4533 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4534 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4535 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4536 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4537 /* create random vec */ 4538 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4539 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4540 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4541 if (CoarseNullSpace) { 4542 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4543 } 4544 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4545 /* solve coarse problem */ 4546 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4547 if (CoarseNullSpace) { 4548 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4549 } 4550 /* set eigenvalue estimation if preonly has not been requested */ 4551 if (compute_eigs) { 4552 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4554 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4555 lambda_max = eigs_r[neigs-1]; 4556 lambda_min = eigs_r[0]; 4557 if (pcbddc->use_coarse_estimates) { 4558 if (lambda_max>lambda_min) { 4559 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4560 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4561 } 4562 } 4563 } 4564 4565 /* check coarse problem residual error */ 4566 if (pcbddc->dbg_flag) { 4567 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4568 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4569 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4570 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4571 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4572 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4573 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4574 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4575 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4576 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4577 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4579 if (compute_eigs) { 4580 PetscReal lambda_max_s,lambda_min_s; 4581 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4582 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4583 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4584 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4585 for (i=0;i<neigs;i++) { 4586 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4587 } 4588 } 4589 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4590 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4591 } 4592 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4593 if (compute_eigs) { 4594 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4595 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4596 } 4597 } 4598 } 4599 /* print additional info */ 4600 if (pcbddc->dbg_flag) { 4601 /* waits until all processes reaches this point */ 4602 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4604 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4605 } 4606 4607 /* free memory */ 4608 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4609 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4610 PetscFunctionReturn(0); 4611 } 4612 4613 #undef __FUNCT__ 4614 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4615 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4616 { 4617 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4618 PC_IS* pcis = (PC_IS*)pc->data; 4619 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4620 IS subset,subset_mult,subset_n; 4621 PetscInt local_size,coarse_size=0; 4622 PetscInt *local_primal_indices=NULL; 4623 const PetscInt *t_local_primal_indices; 4624 PetscErrorCode ierr; 4625 4626 PetscFunctionBegin; 4627 /* Compute global number of coarse dofs */ 4628 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4629 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4630 } 4631 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4632 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4633 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4634 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4635 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4636 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4637 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4638 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4639 if (local_size != pcbddc->local_primal_size) { 4640 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4641 } 4642 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4643 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4644 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4645 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4646 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4647 4648 /* check numbering */ 4649 if (pcbddc->dbg_flag) { 4650 PetscScalar coarsesum,*array; 4651 PetscInt i; 4652 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4653 4654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4657 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4658 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4659 for (i=0;i<pcbddc->local_primal_size;i++) { 4660 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4661 } 4662 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4663 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4664 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4665 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4666 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4667 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4668 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4669 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4670 for (i=0;i<pcis->n;i++) { 4671 if (array[i] == 1.0) { 4672 set_error = PETSC_TRUE; 4673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4674 } 4675 } 4676 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4677 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4678 for (i=0;i<pcis->n;i++) { 4679 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4680 } 4681 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4682 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4683 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4684 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4685 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4686 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4687 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4688 PetscInt *gidxs; 4689 4690 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4691 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4695 for (i=0;i<pcbddc->local_primal_size;i++) { 4696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4697 } 4698 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4699 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4700 } 4701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4702 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4703 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4704 } 4705 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4706 /* get back data */ 4707 *coarse_size_n = coarse_size; 4708 *local_primal_indices_n = local_primal_indices; 4709 PetscFunctionReturn(0); 4710 } 4711 4712 #undef __FUNCT__ 4713 #define __FUNCT__ "PCBDDCGlobalToLocal" 4714 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4715 { 4716 IS localis_t; 4717 PetscInt i,lsize,*idxs,n; 4718 PetscScalar *vals; 4719 PetscErrorCode ierr; 4720 4721 PetscFunctionBegin; 4722 /* get indices in local ordering exploiting local to global map */ 4723 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4724 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4725 for (i=0;i<lsize;i++) vals[i] = 1.0; 4726 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4727 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4728 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4729 if (idxs) { /* multilevel guard */ 4730 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4731 } 4732 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4733 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4734 ierr = PetscFree(vals);CHKERRQ(ierr); 4735 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4736 /* now compute set in local ordering */ 4737 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4738 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4739 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4740 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4741 for (i=0,lsize=0;i<n;i++) { 4742 if (PetscRealPart(vals[i]) > 0.5) { 4743 lsize++; 4744 } 4745 } 4746 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4747 for (i=0,lsize=0;i<n;i++) { 4748 if (PetscRealPart(vals[i]) > 0.5) { 4749 idxs[lsize++] = i; 4750 } 4751 } 4752 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4753 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4754 *localis = localis_t; 4755 PetscFunctionReturn(0); 4756 } 4757 4758 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4759 #undef __FUNCT__ 4760 #define __FUNCT__ "PCBDDCMatMult_Private" 4761 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4762 { 4763 PCBDDCChange_ctx change_ctx; 4764 PetscErrorCode ierr; 4765 4766 PetscFunctionBegin; 4767 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4768 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4769 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4770 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 #undef __FUNCT__ 4775 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4776 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4777 { 4778 PCBDDCChange_ctx change_ctx; 4779 PetscErrorCode ierr; 4780 4781 PetscFunctionBegin; 4782 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4783 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4784 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4785 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4786 PetscFunctionReturn(0); 4787 } 4788 4789 #undef __FUNCT__ 4790 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4791 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4792 { 4793 PC_IS *pcis=(PC_IS*)pc->data; 4794 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4795 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4796 Mat S_j; 4797 PetscInt *used_xadj,*used_adjncy; 4798 PetscBool free_used_adj; 4799 PetscErrorCode ierr; 4800 4801 PetscFunctionBegin; 4802 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4803 free_used_adj = PETSC_FALSE; 4804 if (pcbddc->sub_schurs_layers == -1) { 4805 used_xadj = NULL; 4806 used_adjncy = NULL; 4807 } else { 4808 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4809 used_xadj = pcbddc->mat_graph->xadj; 4810 used_adjncy = pcbddc->mat_graph->adjncy; 4811 } else if (pcbddc->computed_rowadj) { 4812 used_xadj = pcbddc->mat_graph->xadj; 4813 used_adjncy = pcbddc->mat_graph->adjncy; 4814 } else { 4815 PetscBool flg_row=PETSC_FALSE; 4816 const PetscInt *xadj,*adjncy; 4817 PetscInt nvtxs; 4818 4819 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4820 if (flg_row) { 4821 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4822 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4823 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4824 free_used_adj = PETSC_TRUE; 4825 } else { 4826 pcbddc->sub_schurs_layers = -1; 4827 used_xadj = NULL; 4828 used_adjncy = NULL; 4829 } 4830 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4831 } 4832 } 4833 4834 /* setup sub_schurs data */ 4835 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4836 if (!sub_schurs->use_mumps) { 4837 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4838 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4839 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4840 } else { 4841 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4842 PetscBool isseqaij; 4843 if (!pcbddc->use_vertices && reuse_solvers) { 4844 PetscInt n_vertices; 4845 4846 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4847 reuse_solvers = (PetscBool)!n_vertices; 4848 } 4849 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4850 if (!isseqaij) { 4851 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4852 if (matis->A == pcbddc->local_mat) { 4853 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4854 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4855 } else { 4856 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4857 } 4858 } 4859 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4860 } 4861 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4862 4863 /* free adjacency */ 4864 if (free_used_adj) { 4865 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4866 } 4867 PetscFunctionReturn(0); 4868 } 4869 4870 #undef __FUNCT__ 4871 #define __FUNCT__ "PCBDDCInitSubSchurs" 4872 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4873 { 4874 PC_IS *pcis=(PC_IS*)pc->data; 4875 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4876 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4877 PCBDDCGraph graph; 4878 PetscErrorCode ierr; 4879 4880 PetscFunctionBegin; 4881 /* attach interface graph for determining subsets */ 4882 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4883 IS verticesIS,verticescomm; 4884 PetscInt vsize,*idxs; 4885 4886 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4887 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4888 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4890 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4891 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4892 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4893 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4894 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4895 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4896 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4897 /* 4898 if (pcbddc->dbg_flag) { 4899 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4900 } 4901 */ 4902 } else { 4903 graph = pcbddc->mat_graph; 4904 } 4905 4906 /* sub_schurs init */ 4907 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4908 4909 /* free graph struct */ 4910 if (pcbddc->sub_schurs_rebuild) { 4911 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4912 } 4913 PetscFunctionReturn(0); 4914 } 4915