1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 141 } 142 143 if (mss) { /* multilevel */ 144 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 146 } 147 148 for (i=0;i<sub_schurs->n_subs;i++) { 149 150 const PetscInt *idxs; 151 PetscReal infty = PETSC_MAX_REAL; 152 PetscInt j,subset_size,eigs_start = 0; 153 PetscBLASInt B_N; 154 PetscBool same_data = PETSC_FALSE; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 158 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 159 if (sub_schurs->is_hermitian) { 160 PetscInt j,k; 161 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 162 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 } 165 for (j=0;j<subset_size;j++) { 166 for (k=j;k<subset_size;k++) { 167 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 168 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 169 } 170 } 171 } else { 172 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 } 175 } else { 176 S = Sarray + cumarray; 177 St = Starray + cumarray; 178 } 179 180 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 181 /* see if we can save some work */ 182 if (sub_schurs->n_subs == 1) { 183 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 184 } 185 186 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 187 B_neigs = 0; 188 } else { 189 /* Threshold: this is an heuristic for edges */ 190 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 191 192 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 193 PetscBLASInt B_itype = 1; 194 PetscBLASInt B_IL, B_IU; 195 PetscReal eps = -1.0; /* dlamch? */ 196 PetscInt nmin_s; 197 198 /* ask for eigenvalues larger than thresh */ 199 if (pcbddc->dbg_flag) { 200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 201 } 202 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 203 #if defined(PETSC_USE_COMPLEX) 204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 205 #else 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 207 #endif 208 ierr = PetscFPTrapPop();CHKERRQ(ierr); 209 if (B_ierr) { 210 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 211 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 212 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 213 } 214 215 if (B_neigs > nmax) { 216 if (pcbddc->dbg_flag) { 217 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 218 } 219 eigs_start = B_neigs -nmax; 220 B_neigs = nmax; 221 } 222 223 nmin_s = PetscMin(nmin,B_N); 224 if (B_neigs < nmin_s) { 225 PetscBLASInt B_neigs2; 226 227 B_IU = B_N - B_neigs; 228 B_IL = B_N - nmin_s + 1; 229 if (pcbddc->dbg_flag) { 230 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 231 } 232 if (sub_schurs->is_hermitian) { 233 PetscInt j; 234 for (j=0;j<subset_size;j++) { 235 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 236 } 237 for (j=0;j<subset_size;j++) { 238 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 239 } 240 } else { 241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 245 #if defined(PETSC_USE_COMPLEX) 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 247 #else 248 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 249 #endif 250 ierr = PetscFPTrapPop();CHKERRQ(ierr); 251 B_neigs += B_neigs2; 252 } 253 if (B_ierr) { 254 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 255 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 257 } 258 if (pcbddc->dbg_flag) { 259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 260 for (j=0;j<B_neigs;j++) { 261 if (eigs[j] == 0.0) { 262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 263 } else { 264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 265 } 266 } 267 } 268 } else { 269 /* TODO */ 270 } 271 } 272 maxneigs = PetscMax(B_neigs,maxneigs); 273 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 274 if (B_neigs) { 275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 276 277 if (pcbddc->dbg_flag > 1) { 278 PetscInt ii; 279 for (ii=0;ii<B_neigs;ii++) { 280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 281 for (j=0;j<B_N;j++) { 282 #if defined(PETSC_USE_COMPLEX) 283 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 284 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 286 #else 287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 288 #endif 289 } 290 } 291 } 292 #if 0 293 for (j=0;j<B_neigs;j++) { 294 PetscBLASInt Blas_N,Blas_one = 1.0; 295 PetscScalar norm; 296 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 297 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 298 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 299 if (pcbddc->adaptive_constraints_data[cum] > 0.0) { 300 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 301 } else { 302 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 303 } 304 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 305 } 306 #endif 307 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 308 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 309 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 310 cum++; 311 } 312 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 313 /* shift for next computation */ 314 cumarray += subset_size*subset_size; 315 } 316 if (pcbddc->dbg_flag) { 317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 318 } 319 320 if (mss) { 321 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 323 /* destroy matrices (junk) */ 324 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 326 } 327 if (allocated_S_St) { 328 ierr = PetscFree2(S,St);CHKERRQ(ierr); 329 } 330 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 331 #if defined(PETSC_USE_COMPLEX) 332 ierr = PetscFree(rwork);CHKERRQ(ierr); 333 #endif 334 if (pcbddc->dbg_flag) { 335 PetscInt maxneigs_r; 336 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "PCBDDCSetUpSolvers" 344 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 345 { 346 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 347 PetscScalar *coarse_submat_vals; 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 /* Setup local scatters R_to_B and (optionally) R_to_D */ 352 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 353 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 354 355 /* Setup local neumann solver ksp_R */ 356 /* PCBDDCSetUpLocalScatters should be called first! */ 357 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 358 359 /* 360 Setup local correction and local part of coarse basis. 361 Gives back the dense local part of the coarse matrix in column major ordering 362 */ 363 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 364 365 /* Compute total number of coarse nodes and setup coarse solver */ 366 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 367 368 /* free */ 369 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 370 PetscFunctionReturn(0); 371 } 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "PCBDDCResetCustomization" 375 PetscErrorCode PCBDDCResetCustomization(PC pc) 376 { 377 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 378 PetscErrorCode ierr; 379 380 PetscFunctionBegin; 381 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 383 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 385 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 387 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 388 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 389 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 390 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 391 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 392 PetscFunctionReturn(0); 393 } 394 395 #undef __FUNCT__ 396 #define __FUNCT__ "PCBDDCResetTopography" 397 PetscErrorCode PCBDDCResetTopography(PC pc) 398 { 399 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 400 PetscErrorCode ierr; 401 402 PetscFunctionBegin; 403 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 404 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 405 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 406 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 407 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "PCBDDCResetSolvers" 413 PetscErrorCode PCBDDCResetSolvers(PC pc) 414 { 415 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 416 PetscScalar *array; 417 PetscErrorCode ierr; 418 419 PetscFunctionBegin; 420 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 421 if (pcbddc->coarse_phi_B) { 422 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 423 ierr = PetscFree(array);CHKERRQ(ierr); 424 } 425 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 426 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 427 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 429 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 430 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 431 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 433 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 434 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 435 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 436 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 437 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 439 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 440 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 443 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 444 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 446 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 447 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 454 { 455 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 456 PC_IS *pcis = (PC_IS*)pc->data; 457 VecType impVecType; 458 PetscInt n_constraints,n_R,old_size; 459 PetscErrorCode ierr; 460 461 PetscFunctionBegin; 462 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 463 /* get sizes */ 464 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 465 n_R = pcis->n-pcbddc->n_vertices; 466 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 467 /* local work vectors (try to avoid unneeded work)*/ 468 /* R nodes */ 469 old_size = -1; 470 if (pcbddc->vec1_R) { 471 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 472 } 473 if (n_R != old_size) { 474 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 475 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 476 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 477 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 478 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 479 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 480 } 481 /* local primal dofs */ 482 old_size = -1; 483 if (pcbddc->vec1_P) { 484 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 485 } 486 if (pcbddc->local_primal_size != old_size) { 487 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 488 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 489 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 490 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 491 } 492 /* local explicit constraints */ 493 old_size = -1; 494 if (pcbddc->vec1_C) { 495 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 496 } 497 if (n_constraints && n_constraints != old_size) { 498 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 502 } 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "PCBDDCSetUpCorrection" 508 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 509 { 510 PetscErrorCode ierr; 511 /* pointers to pcis and pcbddc */ 512 PC_IS* pcis = (PC_IS*)pc->data; 513 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 514 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 515 /* submatrices of local problem */ 516 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 517 /* submatrices of local coarse problem */ 518 Mat S_VV,S_CV,S_VC,S_CC; 519 /* working matrices */ 520 Mat C_CR; 521 /* additional working stuff */ 522 PC pc_R; 523 Mat F; 524 PetscBool isLU,isCHOL,isILU; 525 526 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 527 PetscScalar *work; 528 PetscInt *idx_V_B; 529 PetscInt n,n_vertices,n_constraints; 530 PetscInt i,n_R,n_D,n_B; 531 PetscBool unsymmetric_check; 532 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 533 MatType impMatType; 534 /* some shortcuts to scalars */ 535 PetscScalar one=1.0,m_one=-1.0; 536 537 PetscFunctionBegin; 538 n_vertices = pcbddc->n_vertices; 539 n_constraints = pcbddc->local_primal_size-n_vertices; 540 /* Set Non-overlapping dimensions */ 541 n_B = pcis->n_B; 542 n_D = pcis->n - n_B; 543 n_R = pcis->n - n_vertices; 544 545 /* Set types for local objects needed by BDDC precondtioner */ 546 impMatType = MATSEQDENSE; 547 548 /* vertices in boundary numbering */ 549 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 550 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 551 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 552 553 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 554 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 556 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 558 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 563 564 unsymmetric_check = PETSC_FALSE; 565 /* allocate workspace */ 566 n = 0; 567 if (n_constraints) { 568 n += n_R*n_constraints; 569 } 570 if (n_vertices) { 571 n = PetscMax(2*n_R*n_vertices,n); 572 n = PetscMax((n_R+n_B)*n_vertices,n); 573 } 574 if (!pcbddc->symmetric_primal) { 575 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 576 unsymmetric_check = PETSC_TRUE; 577 } 578 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 579 580 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 581 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 582 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 583 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 584 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 585 if (isLU || isILU || isCHOL) { 586 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 587 } else if (sub_schurs->reuse_mumps) { 588 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 589 MatFactorType type; 590 591 F = reuse_mumps->F; 592 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 593 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 594 } else { 595 F = NULL; 596 } 597 598 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 599 if (n_constraints) { 600 Mat M1,M2,M3; 601 Mat auxmat; 602 IS is_aux; 603 PetscScalar *array,*array2; 604 605 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 606 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 607 608 /* Extract constraints on R nodes: C_{CR} */ 609 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 610 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 611 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 612 613 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 614 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 615 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 616 for (i=0;i<n_constraints;i++) { 617 const PetscScalar *row_cmat_values; 618 const PetscInt *row_cmat_indices; 619 PetscInt size_of_constraint,j; 620 621 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 622 for (j=0;j<size_of_constraint;j++) { 623 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 624 } 625 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 626 } 627 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 628 if (F) { 629 Mat B; 630 631 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 632 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 633 ierr = MatDestroy(&B);CHKERRQ(ierr); 634 } else { 635 PetscScalar *marr; 636 637 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 640 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 641 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 643 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 644 } 645 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 646 } 647 if (!pcbddc->switch_static) { 648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 649 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 650 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 651 for (i=0;i<n_constraints;i++) { 652 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 653 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 654 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 655 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 656 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 657 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 658 } 659 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 660 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 661 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 662 } else { 663 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 664 pcbddc->local_auxmat2 = local_auxmat2_R; 665 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 666 } 667 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 668 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 669 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 670 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 671 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 672 if (isCHOL) { 673 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 674 } else { 675 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 676 } 677 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 678 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 679 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 680 ierr = MatDestroy(&M2);CHKERRQ(ierr); 681 ierr = MatDestroy(&M3);CHKERRQ(ierr); 682 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 683 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 684 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 685 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 686 ierr = MatDestroy(&M1);CHKERRQ(ierr); 687 } 688 /* Get submatrices from subdomain matrix */ 689 if (n_vertices) { 690 IS is_aux; 691 692 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 693 IS tis; 694 695 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 696 ierr = ISSort(tis);CHKERRQ(ierr); 697 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 698 ierr = ISDestroy(&tis);CHKERRQ(ierr); 699 } else { 700 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 701 } 702 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 703 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 704 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 705 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 706 } 707 708 /* Matrix of coarse basis functions (local) */ 709 if (pcbddc->coarse_phi_B) { 710 PetscInt on_B,on_primal,on_D=n_D; 711 if (pcbddc->coarse_phi_D) { 712 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 713 } 714 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 715 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 716 PetscScalar *marray; 717 718 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 719 ierr = PetscFree(marray);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 724 } 725 } 726 727 if (!pcbddc->coarse_phi_B) { 728 PetscScalar *marray; 729 730 n = n_B*pcbddc->local_primal_size; 731 if (pcbddc->switch_static || pcbddc->dbg_flag) { 732 n += n_D*pcbddc->local_primal_size; 733 } 734 if (!pcbddc->symmetric_primal) { 735 n *= 2; 736 } 737 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 739 n = n_B*pcbddc->local_primal_size; 740 if (pcbddc->switch_static || pcbddc->dbg_flag) { 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 742 n += n_D*pcbddc->local_primal_size; 743 } 744 if (!pcbddc->symmetric_primal) { 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 n = n_B*pcbddc->local_primal_size; 748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 749 } 750 } else { 751 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 752 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 755 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 756 } 757 } 758 } 759 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 760 /* vertices */ 761 if (n_vertices) { 762 763 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 764 765 if (n_R) { 766 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 767 PetscBLASInt B_N,B_one = 1; 768 PetscScalar *x,*y; 769 PetscBool isseqaij; 770 771 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 772 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 774 if (F) { /* TODO could be optimized for symmetric problems */ 775 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 776 } else { 777 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 778 for (i=0;i<n_vertices;i++) { 779 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 780 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 781 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 782 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 783 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 784 } 785 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 786 } 787 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 788 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 789 if (n_constraints) { 790 Mat B; 791 792 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 793 for (i=0;i<n_vertices;i++) { 794 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 795 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 799 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 800 } 801 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 802 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 803 ierr = MatDestroy(&B);CHKERRQ(ierr); 804 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 805 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 806 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 807 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 808 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 } 811 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 812 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 813 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 814 } 815 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 816 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 817 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 818 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 819 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 820 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 821 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 822 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 823 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 824 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 825 } else { 826 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 827 } 828 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 829 /* coarse basis functions */ 830 for (i=0;i<n_vertices;i++) { 831 PetscScalar *y; 832 833 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 834 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 835 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 836 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 837 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 y[n_B*i+idx_V_B[i]] = 1.0; 839 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 840 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 841 842 if (pcbddc->switch_static || pcbddc->dbg_flag) { 843 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 844 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 845 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 848 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 849 } 850 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 851 } 852 /* if n_R == 0 the object is not destroyed */ 853 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 854 } 855 856 if (n_constraints) { 857 Mat B; 858 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 860 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 861 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 862 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 863 if (n_vertices) { 864 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 865 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 866 } else { 867 Mat S_VCt; 868 869 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 870 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 871 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 872 } 873 } 874 ierr = MatDestroy(&B);CHKERRQ(ierr); 875 /* coarse basis functions */ 876 for (i=0;i<n_constraints;i++) { 877 PetscScalar *y; 878 879 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 880 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 881 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 882 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 885 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 886 if (pcbddc->switch_static || pcbddc->dbg_flag) { 887 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 888 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 889 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 892 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 893 } 894 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 895 } 896 } 897 if (n_constraints) { 898 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 899 } 900 901 /* compute other basis functions for non-symmetric problems */ 902 if (!pcbddc->symmetric_primal) { 903 904 if (n_constraints) { 905 Mat S_CCT,B_C; 906 907 /* this is a lazy thing */ 908 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 910 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 911 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 912 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 913 if (n_vertices) { 914 Mat B_V,S_VCT; 915 916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 917 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 918 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 919 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 920 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 921 } 922 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 923 } else { /* if there are no constraints, reset work */ 924 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 925 } 926 if (n_vertices && n_R) { 927 Mat A_VRT; 928 PetscScalar *marray; 929 PetscBLASInt B_N,B_one = 1; 930 931 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 932 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 933 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 934 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 935 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 936 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 937 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 938 } 939 940 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 941 for (i=0;i<pcbddc->local_primal_size;i++) { 942 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 943 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 944 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 945 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 946 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 947 } 948 } else { 949 for (i=0;i<pcbddc->local_primal_size;i++) { 950 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 951 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 952 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 953 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 954 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 955 } 956 } 957 /* coarse basis functions */ 958 for (i=0;i<pcbddc->local_primal_size;i++) { 959 PetscScalar *y; 960 961 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 962 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 963 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 964 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 if (i<n_vertices) { 967 y[n_B*i+idx_V_B[i]] = 1.0; 968 } 969 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 970 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 971 972 if (pcbddc->switch_static || pcbddc->dbg_flag) { 973 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 974 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 975 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 978 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 979 } 980 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 981 } 982 } 983 /* free memory */ 984 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 985 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 986 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 987 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 988 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 989 ierr = PetscFree(work);CHKERRQ(ierr); 990 if (n_vertices) { 991 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 992 } 993 if (n_constraints) { 994 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 995 } 996 /* Checking coarse_sub_mat and coarse basis functios */ 997 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 998 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 999 if (pcbddc->dbg_flag) { 1000 Mat coarse_sub_mat; 1001 Mat AUXMAT,TM1,TM2,TM3,TM4; 1002 Mat coarse_phi_D,coarse_phi_B; 1003 Mat coarse_psi_D,coarse_psi_B; 1004 Mat A_II,A_BB,A_IB,A_BI; 1005 Mat C_B,CPHI; 1006 IS is_dummy; 1007 Vec mones; 1008 MatType checkmattype=MATSEQAIJ; 1009 PetscReal real_value; 1010 1011 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1012 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1013 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1014 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1015 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1016 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1017 if (unsymmetric_check) { 1018 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1019 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1020 } 1021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1022 1023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1026 if (unsymmetric_check) { 1027 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1028 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1029 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1030 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1031 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1032 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1033 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 } else { 1040 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1041 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1046 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1047 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1048 } 1049 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1050 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1051 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1052 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1053 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1054 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1055 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1057 1058 /* check constraints */ 1059 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1060 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1061 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1062 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1063 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1064 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1065 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1067 if (unsymmetric_check) { 1068 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1069 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1070 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1071 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1073 } 1074 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1075 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1076 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1077 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1078 1079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1080 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1081 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1082 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1083 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1084 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1085 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1086 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1087 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1088 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1089 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1090 if (unsymmetric_check) { 1091 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1092 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1093 } 1094 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1095 } 1096 /* get back data */ 1097 *coarse_submat_vals_n = coarse_submat_vals; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1103 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1104 { 1105 Mat *work_mat; 1106 IS isrow_s,iscol_s; 1107 PetscBool rsorted,csorted; 1108 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1113 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1114 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1115 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1116 1117 if (!rsorted) { 1118 const PetscInt *idxs; 1119 PetscInt *idxs_sorted,i; 1120 1121 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1122 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1123 for (i=0;i<rsize;i++) { 1124 idxs_perm_r[i] = i; 1125 } 1126 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1127 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1128 for (i=0;i<rsize;i++) { 1129 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1130 } 1131 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1133 } else { 1134 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1135 isrow_s = isrow; 1136 } 1137 1138 if (!csorted) { 1139 if (isrow == iscol) { 1140 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1141 iscol_s = isrow_s; 1142 } else { 1143 const PetscInt *idxs; 1144 PetscInt *idxs_sorted,i; 1145 1146 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1147 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1148 for (i=0;i<csize;i++) { 1149 idxs_perm_c[i] = i; 1150 } 1151 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1152 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1153 for (i=0;i<csize;i++) { 1154 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1155 } 1156 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1157 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1158 } 1159 } else { 1160 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1161 iscol_s = iscol; 1162 } 1163 1164 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1165 1166 if (!rsorted || !csorted) { 1167 Mat new_mat; 1168 IS is_perm_r,is_perm_c; 1169 1170 if (!rsorted) { 1171 PetscInt *idxs_r,i; 1172 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1173 for (i=0;i<rsize;i++) { 1174 idxs_r[idxs_perm_r[i]] = i; 1175 } 1176 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1177 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1178 } else { 1179 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1180 } 1181 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1182 1183 if (!csorted) { 1184 if (isrow_s == iscol_s) { 1185 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1186 is_perm_c = is_perm_r; 1187 } else { 1188 PetscInt *idxs_c,i; 1189 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1190 for (i=0;i<csize;i++) { 1191 idxs_c[idxs_perm_c[i]] = i; 1192 } 1193 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1195 } 1196 } else { 1197 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1198 } 1199 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1200 1201 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1202 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1203 work_mat[0] = new_mat; 1204 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1205 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1206 } 1207 1208 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1209 *B = work_mat[0]; 1210 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1211 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1212 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1218 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1219 { 1220 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1221 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1222 Mat new_mat; 1223 IS is_local,is_global; 1224 PetscInt local_size; 1225 PetscBool isseqaij; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1230 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1231 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1232 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1233 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1234 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1235 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1236 1237 /* check */ 1238 if (pcbddc->dbg_flag) { 1239 Vec x,x_change; 1240 PetscReal error; 1241 1242 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1243 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1244 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1245 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1248 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1251 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1252 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1253 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1254 ierr = VecDestroy(&x);CHKERRQ(ierr); 1255 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1256 } 1257 1258 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1259 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1260 if (isseqaij) { 1261 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1262 } else { 1263 Mat work_mat; 1264 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1265 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1266 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1267 } 1268 if (matis->A->symmetric_set) { 1269 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1270 #if !defined(PETSC_USE_COMPLEX) 1271 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1272 #endif 1273 } 1274 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1280 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1281 { 1282 PC_IS* pcis = (PC_IS*)(pc->data); 1283 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1284 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1285 PetscInt *idx_R_local=NULL; 1286 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1287 PetscInt vbs,bs; 1288 PetscBT bitmask=NULL; 1289 PetscErrorCode ierr; 1290 1291 PetscFunctionBegin; 1292 /* 1293 No need to setup local scatters if 1294 - primal space is unchanged 1295 AND 1296 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1297 AND 1298 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1299 */ 1300 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1301 PetscFunctionReturn(0); 1302 } 1303 /* destroy old objects */ 1304 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1305 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1306 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1307 /* Set Non-overlapping dimensions */ 1308 n_B = pcis->n_B; 1309 n_D = pcis->n - n_B; 1310 n_vertices = pcbddc->n_vertices; 1311 1312 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1313 1314 /* create auxiliary bitmask and allocate workspace */ 1315 if (!sub_schurs->reuse_mumps) { 1316 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1317 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1318 for (i=0;i<n_vertices;i++) { 1319 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1320 } 1321 1322 for (i=0, n_R=0; i<pcis->n; i++) { 1323 if (!PetscBTLookup(bitmask,i)) { 1324 idx_R_local[n_R++] = i; 1325 } 1326 } 1327 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1328 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1329 1330 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1331 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1332 } 1333 1334 /* Block code */ 1335 vbs = 1; 1336 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1337 if (bs>1 && !(n_vertices%bs)) { 1338 PetscBool is_blocked = PETSC_TRUE; 1339 PetscInt *vary; 1340 if (!sub_schurs->reuse_mumps) { 1341 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1342 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1343 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1344 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1345 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1346 for (i=0; i<pcis->n/bs; i++) { 1347 if (vary[i]!=0 && vary[i]!=bs) { 1348 is_blocked = PETSC_FALSE; 1349 break; 1350 } 1351 } 1352 ierr = PetscFree(vary);CHKERRQ(ierr); 1353 } else { 1354 /* Verify directly the R set */ 1355 for (i=0; i<n_R/bs; i++) { 1356 PetscInt j,node=idx_R_local[bs*i]; 1357 for (j=1; j<bs; j++) { 1358 if (node != idx_R_local[bs*i+j]-j) { 1359 is_blocked = PETSC_FALSE; 1360 break; 1361 } 1362 } 1363 } 1364 } 1365 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1366 vbs = bs; 1367 for (i=0;i<n_R/vbs;i++) { 1368 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1369 } 1370 } 1371 } 1372 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1373 if (sub_schurs->reuse_mumps) { 1374 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1375 1376 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1377 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1378 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1379 reuse_mumps->is_R = pcbddc->is_R_local; 1380 } else { 1381 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1382 } 1383 1384 /* print some info if requested */ 1385 if (pcbddc->dbg_flag) { 1386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1388 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1392 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1393 } 1394 1395 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1396 if (!sub_schurs->reuse_mumps) { 1397 IS is_aux1,is_aux2; 1398 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1399 1400 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1401 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1402 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1403 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1404 for (i=0; i<n_D; i++) { 1405 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1406 } 1407 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1408 for (i=0, j=0; i<n_R; i++) { 1409 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1410 aux_array1[j++] = i; 1411 } 1412 } 1413 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1414 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1415 for (i=0, j=0; i<n_B; i++) { 1416 if (!PetscBTLookup(bitmask,is_indices[i])) { 1417 aux_array2[j++] = i; 1418 } 1419 } 1420 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1421 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1422 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1423 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1424 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1425 1426 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1427 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1428 for (i=0, j=0; i<n_R; i++) { 1429 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1430 aux_array1[j++] = i; 1431 } 1432 } 1433 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1434 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1435 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1436 } 1437 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1438 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1439 } else { 1440 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1441 IS tis; 1442 PetscInt schur_size; 1443 1444 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1445 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1446 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1447 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1448 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1449 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1450 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1451 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1452 } 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1460 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1461 { 1462 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1463 PC_IS *pcis = (PC_IS*)pc->data; 1464 PC pc_temp; 1465 Mat A_RR; 1466 MatReuse reuse; 1467 PetscScalar m_one = -1.0; 1468 PetscReal value; 1469 PetscInt n_D,n_R; 1470 PetscBool use_exact,use_exact_reduced,issbaij; 1471 PetscErrorCode ierr; 1472 /* prefixes stuff */ 1473 char dir_prefix[256],neu_prefix[256],str_level[16]; 1474 size_t len; 1475 1476 PetscFunctionBegin; 1477 1478 /* compute prefixes */ 1479 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1480 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1481 if (!pcbddc->current_level) { 1482 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1483 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1484 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1485 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1488 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1489 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1490 len -= 15; /* remove "pc_bddc_coarse_" */ 1491 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1492 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1493 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1494 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1495 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1496 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1497 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1498 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1499 } 1500 1501 /* DIRICHLET PROBLEM */ 1502 if (dirichlet) { 1503 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1504 if (pcbddc->local_mat->symmetric_set) { 1505 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1506 } 1507 /* Matrix for Dirichlet problem is pcis->A_II */ 1508 n_D = pcis->n - pcis->n_B; 1509 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1510 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1511 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1512 /* default */ 1513 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1514 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1515 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1516 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1517 if (issbaij) { 1518 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1519 } else { 1520 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1521 } 1522 /* Allow user's customization */ 1523 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1524 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1525 } 1526 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1527 if (sub_schurs->reuse_mumps) { 1528 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1529 1530 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1531 } 1532 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1533 if (!n_D) { 1534 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1535 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1536 } 1537 /* Set Up KSP for Dirichlet problem of BDDC */ 1538 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1539 /* set ksp_D into pcis data */ 1540 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1541 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1542 pcis->ksp_D = pcbddc->ksp_D; 1543 } 1544 1545 /* NEUMANN PROBLEM */ 1546 A_RR = 0; 1547 if (neumann) { 1548 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1549 PetscInt ibs,mbs; 1550 PetscBool issbaij; 1551 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1552 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1553 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1554 if (pcbddc->ksp_R) { /* already created ksp */ 1555 PetscInt nn_R; 1556 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1557 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1558 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1559 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1560 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1561 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1562 reuse = MAT_INITIAL_MATRIX; 1563 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1564 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1565 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1566 reuse = MAT_INITIAL_MATRIX; 1567 } else { /* safe to reuse the matrix */ 1568 reuse = MAT_REUSE_MATRIX; 1569 } 1570 } 1571 /* last check */ 1572 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1573 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1574 reuse = MAT_INITIAL_MATRIX; 1575 } 1576 } else { /* first time, so we need to create the matrix */ 1577 reuse = MAT_INITIAL_MATRIX; 1578 } 1579 /* extract A_RR */ 1580 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1581 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1583 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1584 if (matis->A == pcbddc->local_mat) { 1585 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1586 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1587 } else { 1588 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1589 } 1590 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1591 if (matis->A == pcbddc->local_mat) { 1592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1593 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1594 } else { 1595 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1596 } 1597 } 1598 if (!sub_schurs->reuse_mumps) { 1599 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1600 if (pcbddc->local_mat->symmetric_set) { 1601 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1602 } 1603 } else { 1604 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1605 1606 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1607 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1608 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1609 } 1610 if (!pcbddc->ksp_R) { /* create object if not present */ 1611 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1612 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1613 /* default */ 1614 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1615 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1616 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1617 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1618 if (issbaij) { 1619 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1620 } else { 1621 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1622 } 1623 /* Allow user's customization */ 1624 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1625 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1626 } 1627 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1628 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1629 if (!n_R) { 1630 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1631 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1632 } 1633 /* Reuse MUMPS solver if it is present */ 1634 if (sub_schurs->reuse_mumps) { 1635 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1636 1637 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1638 } 1639 /* Set Up KSP for Neumann problem of BDDC */ 1640 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1641 } 1642 /* free Neumann problem's matrix */ 1643 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1644 1645 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1646 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1647 if (pcbddc->dbg_flag) { 1648 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1649 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1651 } 1652 if (dirichlet) { /* Dirichlet */ 1653 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1654 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1655 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1656 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1657 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1658 /* need to be adapted? */ 1659 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1660 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1661 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1662 /* print info */ 1663 if (pcbddc->dbg_flag) { 1664 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1665 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1666 } 1667 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1668 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1669 } 1670 } 1671 if (neumann) { /* Neumann */ 1672 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1673 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1674 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1675 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1676 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1677 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1678 /* need to be adapted? */ 1679 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1680 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1681 /* print info */ 1682 if (pcbddc->dbg_flag) { 1683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1684 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1685 } 1686 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1687 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1688 } 1689 } 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1696 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1697 { 1698 PetscErrorCode ierr; 1699 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1700 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1701 1702 PetscFunctionBegin; 1703 if (!sub_schurs->reuse_mumps) { 1704 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1705 } 1706 if (!pcbddc->switch_static) { 1707 if (applytranspose && pcbddc->local_auxmat1) { 1708 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1709 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1710 } 1711 if (!sub_schurs->reuse_mumps) { 1712 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1713 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1714 } else { 1715 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1716 1717 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 } 1720 } else { 1721 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1725 if (applytranspose && pcbddc->local_auxmat1) { 1726 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1727 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1728 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 } 1731 } 1732 if (!sub_schurs->reuse_mumps) { 1733 if (applytranspose) { 1734 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1735 } else { 1736 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1737 } 1738 } else { 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 if (applytranspose) { 1742 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1743 } else { 1744 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1745 } 1746 } 1747 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1748 if (!pcbddc->switch_static) { 1749 if (!sub_schurs->reuse_mumps) { 1750 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1752 } else { 1753 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1754 1755 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 if (!applytranspose && pcbddc->local_auxmat1) { 1759 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1760 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1761 } 1762 } else { 1763 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1764 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1765 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1766 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1767 if (!applytranspose && pcbddc->local_auxmat1) { 1768 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1769 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1770 } 1771 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1782 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1783 { 1784 PetscErrorCode ierr; 1785 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1786 PC_IS* pcis = (PC_IS*) (pc->data); 1787 const PetscScalar zero = 0.0; 1788 1789 PetscFunctionBegin; 1790 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1791 if (applytranspose) { 1792 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1793 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1794 } else { 1795 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1796 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1797 } 1798 /* start communications from local primal nodes to rhs of coarse solver */ 1799 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1800 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 1803 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1804 /* TODO remove null space when doing multilevel */ 1805 if (pcbddc->coarse_ksp) { 1806 Vec rhs,sol; 1807 1808 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1809 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1810 if (applytranspose) { 1811 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1812 } else { 1813 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1814 } 1815 } 1816 1817 /* Local solution on R nodes */ 1818 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1819 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1820 } 1821 1822 /* communications from coarse sol to local primal nodes */ 1823 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1824 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1825 1826 /* Sum contributions from two levels */ 1827 if (applytranspose) { 1828 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1829 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1830 } else { 1831 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1832 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1839 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1840 { 1841 PetscErrorCode ierr; 1842 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1843 PetscScalar *array; 1844 Vec from,to; 1845 1846 PetscFunctionBegin; 1847 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1848 from = pcbddc->coarse_vec; 1849 to = pcbddc->vec1_P; 1850 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1851 Vec tvec; 1852 1853 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1854 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1855 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1856 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1857 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1858 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1859 } 1860 } else { /* from local to global -> put data in coarse right hand side */ 1861 from = pcbddc->vec1_P; 1862 to = pcbddc->coarse_vec; 1863 } 1864 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1870 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1871 { 1872 PetscErrorCode ierr; 1873 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1874 PetscScalar *array; 1875 Vec from,to; 1876 1877 PetscFunctionBegin; 1878 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1879 from = pcbddc->coarse_vec; 1880 to = pcbddc->vec1_P; 1881 } else { /* from local to global -> put data in coarse right hand side */ 1882 from = pcbddc->vec1_P; 1883 to = pcbddc->coarse_vec; 1884 } 1885 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1886 if (smode == SCATTER_FORWARD) { 1887 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1888 Vec tvec; 1889 1890 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1891 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1892 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1893 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1894 } 1895 } else { 1896 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1897 ierr = VecResetArray(from);CHKERRQ(ierr); 1898 } 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /* uncomment for testing purposes */ 1904 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1907 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1908 { 1909 PetscErrorCode ierr; 1910 PC_IS* pcis = (PC_IS*)(pc->data); 1911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1912 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1913 /* one and zero */ 1914 PetscScalar one=1.0,zero=0.0; 1915 /* space to store constraints and their local indices */ 1916 PetscScalar *constraints_data; 1917 PetscInt *constraints_idxs,*constraints_idxs_B; 1918 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1919 PetscInt *constraints_n; 1920 /* iterators */ 1921 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1922 /* BLAS integers */ 1923 PetscBLASInt lwork,lierr; 1924 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1925 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1926 /* reuse */ 1927 PetscInt olocal_primal_size,olocal_primal_size_cc; 1928 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1929 /* change of basis */ 1930 PetscBool qr_needed; 1931 PetscBT change_basis,qr_needed_idx; 1932 /* auxiliary stuff */ 1933 PetscInt *nnz,*is_indices; 1934 PetscInt ncc; 1935 /* some quantities */ 1936 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1937 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1938 1939 PetscFunctionBegin; 1940 /* Destroy Mat objects computed previously */ 1941 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1942 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1943 /* save info on constraints from previous setup (if any) */ 1944 olocal_primal_size = pcbddc->local_primal_size; 1945 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1946 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1947 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1948 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1949 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1950 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1951 1952 /* print some info */ 1953 if (pcbddc->dbg_flag) { 1954 IS vertices; 1955 PetscInt nv,nedges,nfaces; 1956 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1957 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1958 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1959 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1965 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1966 } 1967 1968 if (!pcbddc->adaptive_selection) { 1969 IS ISForVertices,*ISForFaces,*ISForEdges; 1970 MatNullSpace nearnullsp; 1971 const Vec *nearnullvecs; 1972 Vec *localnearnullsp; 1973 PetscScalar *array; 1974 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1975 PetscBool nnsp_has_cnst; 1976 /* LAPACK working arrays for SVD or POD */ 1977 PetscBool skip_lapack,boolforchange; 1978 PetscScalar *work; 1979 PetscReal *singular_vals; 1980 #if defined(PETSC_USE_COMPLEX) 1981 PetscReal *rwork; 1982 #endif 1983 #if defined(PETSC_MISSING_LAPACK_GESVD) 1984 PetscScalar *temp_basis,*correlation_mat; 1985 #else 1986 PetscBLASInt dummy_int=1; 1987 PetscScalar dummy_scalar=1.; 1988 #endif 1989 1990 /* Get index sets for faces, edges and vertices from graph */ 1991 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1992 /* free unneeded index sets */ 1993 if (!pcbddc->use_vertices) { 1994 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1995 } 1996 if (!pcbddc->use_edges) { 1997 for (i=0;i<n_ISForEdges;i++) { 1998 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1999 } 2000 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2001 n_ISForEdges = 0; 2002 } 2003 if (!pcbddc->use_faces) { 2004 for (i=0;i<n_ISForFaces;i++) { 2005 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2006 } 2007 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2008 n_ISForFaces = 0; 2009 } 2010 2011 /* check if near null space is attached to global mat */ 2012 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2013 if (nearnullsp) { 2014 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2015 /* remove any stored info */ 2016 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2017 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2018 /* store information for BDDC solver reuse */ 2019 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2020 pcbddc->onearnullspace = nearnullsp; 2021 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2022 for (i=0;i<nnsp_size;i++) { 2023 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2024 } 2025 } else { /* if near null space is not provided BDDC uses constants by default */ 2026 nnsp_size = 0; 2027 nnsp_has_cnst = PETSC_TRUE; 2028 } 2029 /* get max number of constraints on a single cc */ 2030 max_constraints = nnsp_size; 2031 if (nnsp_has_cnst) max_constraints++; 2032 2033 /* 2034 Evaluate maximum storage size needed by the procedure 2035 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2036 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2037 There can be multiple constraints per connected component 2038 */ 2039 n_vertices = 0; 2040 if (ISForVertices) { 2041 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2042 } 2043 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2044 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2045 2046 total_counts = n_ISForFaces+n_ISForEdges; 2047 total_counts *= max_constraints; 2048 total_counts += n_vertices; 2049 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2050 2051 total_counts = 0; 2052 max_size_of_constraint = 0; 2053 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2054 IS used_is; 2055 if (i<n_ISForEdges) { 2056 used_is = ISForEdges[i]; 2057 } else { 2058 used_is = ISForFaces[i-n_ISForEdges]; 2059 } 2060 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2061 total_counts += j; 2062 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2063 } 2064 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2065 2066 /* get local part of global near null space vectors */ 2067 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2068 for (k=0;k<nnsp_size;k++) { 2069 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2070 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 2074 /* whether or not to skip lapack calls */ 2075 skip_lapack = PETSC_TRUE; 2076 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2077 2078 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2079 if (!skip_lapack) { 2080 PetscScalar temp_work; 2081 2082 #if defined(PETSC_MISSING_LAPACK_GESVD) 2083 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2084 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2085 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2086 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2087 #if defined(PETSC_USE_COMPLEX) 2088 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2089 #endif 2090 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2091 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2093 lwork = -1; 2094 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2095 #if !defined(PETSC_USE_COMPLEX) 2096 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2097 #else 2098 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2099 #endif 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2102 #else /* on missing GESVD */ 2103 /* SVD */ 2104 PetscInt max_n,min_n; 2105 max_n = max_size_of_constraint; 2106 min_n = max_constraints; 2107 if (max_size_of_constraint < max_constraints) { 2108 min_n = max_size_of_constraint; 2109 max_n = max_constraints; 2110 } 2111 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2112 #if defined(PETSC_USE_COMPLEX) 2113 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2114 #endif 2115 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2116 lwork = -1; 2117 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2121 #if !defined(PETSC_USE_COMPLEX) 2122 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2123 #else 2124 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2125 #endif 2126 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2127 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2128 #endif /* on missing GESVD */ 2129 /* Allocate optimal workspace */ 2130 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2131 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2132 } 2133 /* Now we can loop on constraining sets */ 2134 total_counts = 0; 2135 constraints_idxs_ptr[0] = 0; 2136 constraints_data_ptr[0] = 0; 2137 /* vertices */ 2138 if (n_vertices) { 2139 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2140 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2141 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2142 for (i=0;i<n_vertices;i++) { 2143 constraints_n[total_counts] = 1; 2144 constraints_data[total_counts] = 1.0; 2145 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2146 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2147 total_counts++; 2148 } 2149 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2150 PetscBool used_vertex; 2151 for (i=0;i<n_vertices;i++) { 2152 used_vertex = PETSC_FALSE; 2153 k = 0; 2154 while (!used_vertex && k<nnsp_size) { 2155 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2156 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2157 constraints_n[total_counts] = 1; 2158 constraints_idxs[total_counts] = is_indices[i]; 2159 constraints_data[total_counts] = 1.0; 2160 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2161 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2162 total_counts++; 2163 used_vertex = PETSC_TRUE; 2164 } 2165 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2166 k++; 2167 } 2168 } 2169 } 2170 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2171 n_vertices = total_counts; 2172 } 2173 2174 /* edges and faces */ 2175 total_counts_cc = total_counts; 2176 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2177 IS used_is; 2178 PetscBool idxs_copied = PETSC_FALSE; 2179 2180 if (ncc<n_ISForEdges) { 2181 used_is = ISForEdges[ncc]; 2182 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2183 } else { 2184 used_is = ISForFaces[ncc-n_ISForEdges]; 2185 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2186 } 2187 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2188 2189 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2190 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2191 /* change of basis should not be performed on local periodic nodes */ 2192 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2193 if (nnsp_has_cnst) { 2194 PetscScalar quad_value; 2195 2196 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2197 idxs_copied = PETSC_TRUE; 2198 2199 if (!pcbddc->use_nnsp_true) { 2200 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2201 } else { 2202 quad_value = 1.0; 2203 } 2204 for (j=0;j<size_of_constraint;j++) { 2205 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2206 } 2207 temp_constraints++; 2208 total_counts++; 2209 } 2210 for (k=0;k<nnsp_size;k++) { 2211 PetscReal real_value; 2212 PetscScalar *ptr_to_data; 2213 2214 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2215 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2216 for (j=0;j<size_of_constraint;j++) { 2217 ptr_to_data[j] = array[is_indices[j]]; 2218 } 2219 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2220 /* check if array is null on the connected component */ 2221 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2222 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2223 if (real_value > 0.0) { /* keep indices and values */ 2224 temp_constraints++; 2225 total_counts++; 2226 if (!idxs_copied) { 2227 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2228 idxs_copied = PETSC_TRUE; 2229 } 2230 } 2231 } 2232 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2233 valid_constraints = temp_constraints; 2234 if (!pcbddc->use_nnsp_true && temp_constraints) { 2235 if (temp_constraints == 1) { /* just normalize the constraint */ 2236 PetscScalar norm,*ptr_to_data; 2237 2238 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2239 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2240 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2241 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2242 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2243 } else { /* perform SVD */ 2244 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2245 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2246 2247 #if defined(PETSC_MISSING_LAPACK_GESVD) 2248 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2249 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2250 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2251 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2252 from that computed using LAPACKgesvd 2253 -> This is due to a different computation of eigenvectors in LAPACKheev 2254 -> The quality of the POD-computed basis will be the same */ 2255 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2256 /* Store upper triangular part of correlation matrix */ 2257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2258 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2259 for (j=0;j<temp_constraints;j++) { 2260 for (k=0;k<j+1;k++) { 2261 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2262 } 2263 } 2264 /* compute eigenvalues and eigenvectors of correlation matrix */ 2265 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2266 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2267 #if !defined(PETSC_USE_COMPLEX) 2268 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2269 #else 2270 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2271 #endif 2272 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2273 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2274 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2275 j = 0; 2276 while (j < temp_constraints && singular_vals[j] < tol) j++; 2277 total_counts = total_counts-j; 2278 valid_constraints = temp_constraints-j; 2279 /* scale and copy POD basis into used quadrature memory */ 2280 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2281 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2282 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2286 if (j<temp_constraints) { 2287 PetscInt ii; 2288 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2289 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2290 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2291 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2292 for (k=0;k<temp_constraints-j;k++) { 2293 for (ii=0;ii<size_of_constraint;ii++) { 2294 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2295 } 2296 } 2297 } 2298 #else /* on missing GESVD */ 2299 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2300 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2303 #if !defined(PETSC_USE_COMPLEX) 2304 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2305 #else 2306 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2307 #endif 2308 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2310 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2311 k = temp_constraints; 2312 if (k > size_of_constraint) k = size_of_constraint; 2313 j = 0; 2314 while (j < k && singular_vals[k-j-1] < tol) j++; 2315 valid_constraints = k-j; 2316 total_counts = total_counts-temp_constraints+valid_constraints; 2317 #endif /* on missing GESVD */ 2318 } 2319 } 2320 /* update pointers information */ 2321 if (valid_constraints) { 2322 constraints_n[total_counts_cc] = valid_constraints; 2323 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2324 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2325 /* set change_of_basis flag */ 2326 if (boolforchange) { 2327 PetscBTSet(change_basis,total_counts_cc); 2328 } 2329 total_counts_cc++; 2330 } 2331 } 2332 /* free workspace */ 2333 if (!skip_lapack) { 2334 ierr = PetscFree(work);CHKERRQ(ierr); 2335 #if defined(PETSC_USE_COMPLEX) 2336 ierr = PetscFree(rwork);CHKERRQ(ierr); 2337 #endif 2338 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2339 #if defined(PETSC_MISSING_LAPACK_GESVD) 2340 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2341 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2342 #endif 2343 } 2344 for (k=0;k<nnsp_size;k++) { 2345 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2346 } 2347 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2348 /* free index sets of faces, edges and vertices */ 2349 for (i=0;i<n_ISForFaces;i++) { 2350 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2351 } 2352 if (n_ISForFaces) { 2353 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2354 } 2355 for (i=0;i<n_ISForEdges;i++) { 2356 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2357 } 2358 if (n_ISForEdges) { 2359 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2360 } 2361 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2362 } else { 2363 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2364 2365 total_counts = 0; 2366 n_vertices = 0; 2367 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2368 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2369 } 2370 max_constraints = 0; 2371 total_counts_cc = 0; 2372 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2373 total_counts += pcbddc->adaptive_constraints_n[i]; 2374 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2375 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2376 } 2377 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2378 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2379 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2380 constraints_data = pcbddc->adaptive_constraints_data; 2381 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2382 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2383 total_counts_cc = 0; 2384 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2385 if (pcbddc->adaptive_constraints_n[i]) { 2386 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2387 } 2388 } 2389 #if 0 2390 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2391 for (i=0;i<total_counts_cc;i++) { 2392 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2393 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2394 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2395 printf(" %d",constraints_idxs[j]); 2396 } 2397 printf("\n"); 2398 printf("number of cc: %d\n",constraints_n[i]); 2399 } 2400 for (i=0;i<n_vertices;i++) { 2401 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2402 } 2403 for (i=0;i<sub_schurs->n_subs;i++) { 2404 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2405 } 2406 #endif 2407 2408 max_size_of_constraint = 0; 2409 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2410 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2411 /* Change of basis */ 2412 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2413 if (pcbddc->use_change_of_basis) { 2414 for (i=0;i<sub_schurs->n_subs;i++) { 2415 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2416 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2417 } 2418 } 2419 } 2420 } 2421 pcbddc->local_primal_size = total_counts; 2422 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2423 2424 /* map constraints_idxs in boundary numbering */ 2425 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2426 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2427 2428 /* Create constraint matrix */ 2429 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2430 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2431 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2432 2433 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2434 /* determine if a QR strategy is needed for change of basis */ 2435 qr_needed = PETSC_FALSE; 2436 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2437 total_primal_vertices=0; 2438 pcbddc->local_primal_size_cc = 0; 2439 for (i=0;i<total_counts_cc;i++) { 2440 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2441 if (size_of_constraint == 1) { 2442 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2443 pcbddc->local_primal_size_cc += 1; 2444 } else if (PetscBTLookup(change_basis,i)) { 2445 for (k=0;k<constraints_n[i];k++) { 2446 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2447 } 2448 pcbddc->local_primal_size_cc += constraints_n[i]; 2449 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2450 PetscBTSet(qr_needed_idx,i); 2451 qr_needed = PETSC_TRUE; 2452 } 2453 } else { 2454 pcbddc->local_primal_size_cc += 1; 2455 } 2456 } 2457 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2458 pcbddc->n_vertices = total_primal_vertices; 2459 /* permute indices in order to have a sorted set of vertices */ 2460 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2461 2462 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2463 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2464 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2465 2466 /* nonzero structure of constraint matrix */ 2467 /* and get reference dof for local constraints */ 2468 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2469 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2470 2471 j = total_primal_vertices; 2472 total_counts = total_primal_vertices; 2473 cum = total_primal_vertices; 2474 for (i=n_vertices;i<total_counts_cc;i++) { 2475 if (!PetscBTLookup(change_basis,i)) { 2476 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2477 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2478 cum++; 2479 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2480 for (k=0;k<constraints_n[i];k++) { 2481 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2482 nnz[j+k] = size_of_constraint; 2483 } 2484 j += constraints_n[i]; 2485 } 2486 } 2487 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2488 ierr = PetscFree(nnz);CHKERRQ(ierr); 2489 2490 /* set values in constraint matrix */ 2491 for (i=0;i<total_primal_vertices;i++) { 2492 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2493 } 2494 total_counts = total_primal_vertices; 2495 for (i=n_vertices;i<total_counts_cc;i++) { 2496 if (!PetscBTLookup(change_basis,i)) { 2497 PetscInt *cols; 2498 2499 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2500 cols = constraints_idxs+constraints_idxs_ptr[i]; 2501 for (k=0;k<constraints_n[i];k++) { 2502 PetscInt row = total_counts+k; 2503 PetscScalar *vals; 2504 2505 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2506 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2507 } 2508 total_counts += constraints_n[i]; 2509 } 2510 } 2511 /* assembling */ 2512 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2513 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2514 2515 /* 2516 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2517 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2518 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2519 */ 2520 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2521 if (pcbddc->use_change_of_basis) { 2522 /* dual and primal dofs on a single cc */ 2523 PetscInt dual_dofs,primal_dofs; 2524 /* working stuff for GEQRF */ 2525 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2526 PetscBLASInt lqr_work; 2527 /* working stuff for UNGQR */ 2528 PetscScalar *gqr_work,lgqr_work_t; 2529 PetscBLASInt lgqr_work; 2530 /* working stuff for TRTRS */ 2531 PetscScalar *trs_rhs; 2532 PetscBLASInt Blas_NRHS; 2533 /* pointers for values insertion into change of basis matrix */ 2534 PetscInt *start_rows,*start_cols; 2535 PetscScalar *start_vals; 2536 /* working stuff for values insertion */ 2537 PetscBT is_primal; 2538 PetscInt *aux_primal_numbering_B; 2539 /* matrix sizes */ 2540 PetscInt global_size,local_size; 2541 /* temporary change of basis */ 2542 Mat localChangeOfBasisMatrix; 2543 /* extra space for debugging */ 2544 PetscScalar *dbg_work; 2545 2546 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2547 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2548 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2549 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2550 /* nonzeros for local mat */ 2551 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2552 for (i=0;i<pcis->n;i++) nnz[i]=1; 2553 for (i=n_vertices;i<total_counts_cc;i++) { 2554 if (PetscBTLookup(change_basis,i)) { 2555 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2556 if (PetscBTLookup(qr_needed_idx,i)) { 2557 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2558 } else { 2559 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2560 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2561 } 2562 } 2563 } 2564 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2565 ierr = PetscFree(nnz);CHKERRQ(ierr); 2566 /* Set initial identity in the matrix */ 2567 for (i=0;i<pcis->n;i++) { 2568 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2569 } 2570 2571 if (pcbddc->dbg_flag) { 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2574 } 2575 2576 2577 /* Now we loop on the constraints which need a change of basis */ 2578 /* 2579 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2580 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2581 2582 Basic blocks of change of basis matrix T computed by 2583 2584 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2585 2586 | 1 0 ... 0 s_1/S | 2587 | 0 1 ... 0 s_2/S | 2588 | ... | 2589 | 0 ... 1 s_{n-1}/S | 2590 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2591 2592 with S = \sum_{i=1}^n s_i^2 2593 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2594 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2595 2596 - QR decomposition of constraints otherwise 2597 */ 2598 if (qr_needed) { 2599 /* space to store Q */ 2600 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2601 /* first we issue queries for optimal work */ 2602 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2604 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2605 lqr_work = -1; 2606 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2607 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2608 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2609 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2610 lgqr_work = -1; 2611 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2612 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2613 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2615 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2616 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2617 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2618 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2619 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2620 /* array to store scaling factors for reflectors */ 2621 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2622 /* array to store rhs and solution of triangular solver */ 2623 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2624 /* allocating workspace for check */ 2625 if (pcbddc->dbg_flag) { 2626 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2627 } 2628 } 2629 /* array to store whether a node is primal or not */ 2630 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2631 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2632 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2633 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2634 for (i=0;i<total_primal_vertices;i++) { 2635 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2636 } 2637 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2638 2639 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2640 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2641 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2642 if (PetscBTLookup(change_basis,total_counts)) { 2643 /* get constraint info */ 2644 primal_dofs = constraints_n[total_counts]; 2645 dual_dofs = size_of_constraint-primal_dofs; 2646 2647 if (pcbddc->dbg_flag) { 2648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2649 } 2650 2651 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2652 2653 /* copy quadrature constraints for change of basis check */ 2654 if (pcbddc->dbg_flag) { 2655 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2656 } 2657 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2658 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2659 2660 /* compute QR decomposition of constraints */ 2661 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2664 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2665 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2666 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2667 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2668 2669 /* explictly compute R^-T */ 2670 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2671 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2672 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2673 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2675 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2677 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2678 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2679 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2680 2681 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2683 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2684 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2686 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2687 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2688 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2689 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2690 2691 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2692 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2693 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2696 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2697 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2698 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2700 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2701 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2702 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2703 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2704 2705 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2706 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2707 /* insert cols for primal dofs */ 2708 for (j=0;j<primal_dofs;j++) { 2709 start_vals = &qr_basis[j*size_of_constraint]; 2710 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2711 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2712 } 2713 /* insert cols for dual dofs */ 2714 for (j=0,k=0;j<dual_dofs;k++) { 2715 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2716 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2717 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2718 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2719 j++; 2720 } 2721 } 2722 2723 /* check change of basis */ 2724 if (pcbddc->dbg_flag) { 2725 PetscInt ii,jj; 2726 PetscBool valid_qr=PETSC_TRUE; 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2736 for (jj=0;jj<size_of_constraint;jj++) { 2737 for (ii=0;ii<primal_dofs;ii++) { 2738 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2739 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2740 } 2741 } 2742 if (!valid_qr) { 2743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2744 for (jj=0;jj<size_of_constraint;jj++) { 2745 for (ii=0;ii<primal_dofs;ii++) { 2746 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2747 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2748 } 2749 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2750 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2751 } 2752 } 2753 } 2754 } else { 2755 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2756 } 2757 } 2758 } else { /* simple transformation block */ 2759 PetscInt row,col; 2760 PetscScalar val,norm; 2761 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2763 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2764 for (j=0;j<size_of_constraint;j++) { 2765 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2766 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2767 if (!PetscBTLookup(is_primal,row_B)) { 2768 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2769 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2770 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2771 } else { 2772 for (k=0;k<size_of_constraint;k++) { 2773 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2774 if (row != col) { 2775 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2776 } else { 2777 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2778 } 2779 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2780 } 2781 } 2782 } 2783 if (pcbddc->dbg_flag) { 2784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2785 } 2786 } 2787 } else { 2788 if (pcbddc->dbg_flag) { 2789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2790 } 2791 } 2792 } 2793 2794 /* free workspace */ 2795 if (qr_needed) { 2796 if (pcbddc->dbg_flag) { 2797 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2798 } 2799 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2800 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2801 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2802 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2803 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2804 } 2805 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2806 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2807 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2808 2809 /* assembling of global change of variable */ 2810 { 2811 Mat tmat; 2812 PetscInt bs; 2813 2814 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2815 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2816 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2817 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2818 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2819 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2820 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2821 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2822 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2823 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2824 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2825 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2826 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2827 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2828 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2829 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2831 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2832 } 2833 /* check */ 2834 if (pcbddc->dbg_flag) { 2835 PetscReal error; 2836 Vec x,x_change; 2837 2838 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2839 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2840 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2841 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2842 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2845 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2846 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2847 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2848 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2849 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2851 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2852 ierr = VecDestroy(&x);CHKERRQ(ierr); 2853 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2854 } 2855 2856 /* adapt sub_schurs computed (if any) */ 2857 if (pcbddc->use_deluxe_scaling) { 2858 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2859 if (sub_schurs->S_Ej_all) { 2860 Mat S_new,tmat; 2861 IS is_all_N; 2862 2863 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2864 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2865 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2866 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2867 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2868 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2869 sub_schurs->S_Ej_all = S_new; 2870 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2871 if (sub_schurs->sum_S_Ej_all) { 2872 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2873 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2874 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2875 sub_schurs->sum_S_Ej_all = S_new; 2876 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2877 } 2878 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2879 } 2880 } 2881 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2882 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2883 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2884 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2885 } 2886 2887 /* set up change of basis context */ 2888 if (pcbddc->ChangeOfBasisMatrix) { 2889 PCBDDCChange_ctx change_ctx; 2890 2891 if (!pcbddc->new_global_mat) { 2892 PetscInt global_size,local_size; 2893 2894 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2895 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2896 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2897 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2898 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2899 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2900 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2901 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2902 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2903 } else { 2904 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2905 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2906 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2907 } 2908 if (!pcbddc->user_ChangeOfBasisMatrix) { 2909 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2910 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2911 } else { 2912 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2913 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2914 } 2915 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2916 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2917 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2918 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2919 } 2920 2921 /* check if a new primal space has been introduced */ 2922 pcbddc->new_primal_space_local = PETSC_TRUE; 2923 if (olocal_primal_size == pcbddc->local_primal_size) { 2924 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2925 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2926 if (!pcbddc->new_primal_space_local) { 2927 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2928 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2929 } 2930 } 2931 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2932 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2933 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2934 2935 /* flush dbg viewer */ 2936 if (pcbddc->dbg_flag) { 2937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2938 } 2939 2940 /* free workspace */ 2941 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2942 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2943 if (!pcbddc->adaptive_selection) { 2944 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2945 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2946 } else { 2947 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2948 pcbddc->adaptive_constraints_idxs_ptr, 2949 pcbddc->adaptive_constraints_data_ptr, 2950 pcbddc->adaptive_constraints_idxs, 2951 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2952 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2953 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2954 } 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2960 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2961 { 2962 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2963 PC_IS *pcis = (PC_IS*)pc->data; 2964 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2965 PetscInt ierr,i,vertex_size,N; 2966 PetscViewer viewer=pcbddc->dbg_viewer; 2967 2968 PetscFunctionBegin; 2969 /* Reset previously computed graph */ 2970 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2971 /* Init local Graph struct */ 2972 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2973 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 2974 2975 /* Check validity of the csr graph passed in by the user */ 2976 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2977 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2978 } 2979 2980 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2981 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2982 PetscInt *xadj,*adjncy; 2983 PetscInt nvtxs; 2984 PetscBool flg_row=PETSC_FALSE; 2985 2986 if (pcbddc->use_local_adj) { 2987 2988 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2989 if (flg_row) { 2990 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2991 pcbddc->computed_rowadj = PETSC_TRUE; 2992 } 2993 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2994 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2995 IS is_dummy; 2996 ISLocalToGlobalMapping l2gmap_dummy; 2997 PetscInt j,sum; 2998 PetscInt *cxadj,*cadjncy; 2999 const PetscInt *idxs; 3000 PCBDDCGraph graph; 3001 PetscBT is_on_boundary; 3002 3003 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3004 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3005 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3006 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3007 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3008 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3009 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3010 if (flg_row) { 3011 graph->xadj = xadj; 3012 graph->adjncy = adjncy; 3013 } 3014 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3015 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3016 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3017 3018 if (pcbddc->dbg_flag) { 3019 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3020 for (i=0;i<graph->ncc;i++) { 3021 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3022 } 3023 } 3024 3025 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3026 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3027 for (i=0;i<pcis->n_B;i++) { 3028 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3029 } 3030 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3031 3032 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3033 sum = 0; 3034 for (i=0;i<graph->ncc;i++) { 3035 PetscInt sizecc = 0; 3036 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3037 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3038 sizecc++; 3039 } 3040 } 3041 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3042 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3043 cxadj[graph->queue[j]] = sizecc; 3044 } 3045 } 3046 sum += sizecc*sizecc; 3047 } 3048 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3049 sum = 0; 3050 for (i=0;i<pcis->n;i++) { 3051 PetscInt temp = cxadj[i]; 3052 cxadj[i] = sum; 3053 sum += temp; 3054 } 3055 cxadj[pcis->n] = sum; 3056 for (i=0;i<graph->ncc;i++) { 3057 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3058 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3059 PetscInt k,sizecc = 0; 3060 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3061 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3062 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3063 sizecc++; 3064 } 3065 } 3066 } 3067 } 3068 } 3069 if (sum) { 3070 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3071 } else { 3072 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3073 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3074 } 3075 graph->xadj = 0; 3076 graph->adjncy = 0; 3077 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3078 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3079 } 3080 } 3081 if (pcbddc->dbg_flag) { 3082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3083 } 3084 3085 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3086 vertex_size = 1; 3087 if (pcbddc->user_provided_isfordofs) { 3088 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3089 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3090 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3091 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3092 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3093 } 3094 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3095 pcbddc->n_ISForDofs = 0; 3096 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3097 } 3098 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3099 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3100 } else { 3101 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3102 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3104 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3105 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3106 } 3107 } 3108 } 3109 3110 /* Setup of Graph */ 3111 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3112 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3113 } 3114 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3115 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3116 } 3117 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3118 3119 /* Graph's connected components analysis */ 3120 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3121 3122 /* print some info to stdout */ 3123 if (pcbddc->dbg_flag) { 3124 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3125 } 3126 3127 /* mark topography has done */ 3128 pcbddc->recompute_topography = PETSC_FALSE; 3129 PetscFunctionReturn(0); 3130 } 3131 3132 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "PCBDDCSubsetNumbering" 3135 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3136 { 3137 PetscSF sf; 3138 PetscLayout map; 3139 const PetscInt *idxs; 3140 PetscInt *leaf_data,*root_data,*gidxs; 3141 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3142 PetscInt n_n,nlocals,start,first_index; 3143 PetscMPIInt commsize; 3144 PetscBool first_found; 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3149 if (subset_mult) { 3150 PetscCheckSameComm(subset,1,subset_mult,2); 3151 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3152 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3153 } 3154 /* create workspace layout for computing global indices of subset */ 3155 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3156 lbounds[0] = lbounds[1] = 0; 3157 for (i=0;i<n;i++) { 3158 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3159 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3160 } 3161 lbounds[0] = -lbounds[0]; 3162 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3163 gbounds[0] = -gbounds[0]; 3164 N = gbounds[1] - gbounds[0] + 1; 3165 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3166 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3167 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3168 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3169 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3170 3171 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3172 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3173 if (subset_mult) { 3174 const PetscInt* idxs_mult; 3175 3176 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3177 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3178 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3179 } else { 3180 for (i=0;i<n;i++) leaf_data[i] = 1; 3181 } 3182 /* local size of new subset */ 3183 n_n = 0; 3184 for (i=0;i<n;i++) n_n += leaf_data[i]; 3185 3186 /* global indexes in layout */ 3187 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3188 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3189 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3190 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3191 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3192 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3193 3194 /* reduce from leaves to roots */ 3195 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3196 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3197 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3198 3199 /* count indexes in local part of layout */ 3200 nlocals = 0; 3201 first_index = -1; 3202 first_found = PETSC_FALSE; 3203 for (i=0;i<Nl;i++) { 3204 if (!first_found && root_data[i]) { 3205 first_found = PETSC_TRUE; 3206 first_index = i; 3207 } 3208 nlocals += root_data[i]; 3209 } 3210 3211 /* cumulative of number of indexes and size of subset without holes */ 3212 #if defined(PETSC_HAVE_MPI_EXSCAN) 3213 start = 0; 3214 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3215 #else 3216 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3217 start = start-nlocals; 3218 #endif 3219 3220 if (N_n) { /* compute total size of new subset if requested */ 3221 *N_n = start + nlocals; 3222 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3223 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3224 } 3225 3226 /* adapt root data with cumulative */ 3227 if (first_found) { 3228 PetscInt old_index; 3229 3230 root_data[first_index] += start; 3231 old_index = first_index; 3232 for (i=first_index+1;i<Nl;i++) { 3233 if (root_data[i]) { 3234 root_data[i] += root_data[old_index]; 3235 old_index = i; 3236 } 3237 } 3238 } 3239 3240 /* from roots to leaves */ 3241 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3242 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3243 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3244 3245 /* create new IS with global indexes without holes */ 3246 if (subset_mult) { 3247 const PetscInt* idxs_mult; 3248 PetscInt cum; 3249 3250 cum = 0; 3251 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3252 for (i=0;i<n;i++) { 3253 PetscInt j; 3254 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3255 } 3256 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3257 } else { 3258 for (i=0;i<n;i++) { 3259 gidxs[i] = leaf_data[i]-1; 3260 } 3261 } 3262 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3263 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3269 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3270 { 3271 PetscInt i,j; 3272 PetscScalar *alphas; 3273 PetscErrorCode ierr; 3274 3275 PetscFunctionBegin; 3276 /* this implements stabilized Gram-Schmidt */ 3277 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3278 for (i=0;i<n;i++) { 3279 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3280 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3281 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3282 } 3283 ierr = PetscFree(alphas);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "MatISGetSubassemblingPattern" 3289 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3290 { 3291 IS ranks_send_to; 3292 PetscInt n_neighs,*neighs,*n_shared,**shared; 3293 PetscMPIInt size,rank,color; 3294 PetscInt *xadj,*adjncy; 3295 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3296 PetscInt i,local_size,threshold=0; 3297 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3298 PetscSubcomm subcomm; 3299 PetscErrorCode ierr; 3300 3301 PetscFunctionBegin; 3302 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3303 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3304 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3305 3306 /* Get info on mapping */ 3307 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3308 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3309 3310 /* build local CSR graph of subdomains' connectivity */ 3311 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3312 xadj[0] = 0; 3313 xadj[1] = PetscMax(n_neighs-1,0); 3314 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3315 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3316 3317 if (threshold) { 3318 PetscInt xadj_count = 0; 3319 for (i=1;i<n_neighs;i++) { 3320 if (n_shared[i] > threshold) { 3321 adjncy[xadj_count] = neighs[i]; 3322 adjncy_wgt[xadj_count] = n_shared[i]; 3323 xadj_count++; 3324 } 3325 } 3326 xadj[1] = xadj_count; 3327 } else { 3328 if (xadj[1]) { 3329 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3330 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3331 } 3332 } 3333 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3334 if (use_square) { 3335 for (i=0;i<xadj[1];i++) { 3336 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3337 } 3338 } 3339 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3340 3341 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3342 3343 /* 3344 Restrict work on active processes only. 3345 */ 3346 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3347 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3348 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3349 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3350 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3351 if (color) { 3352 ierr = PetscFree(xadj);CHKERRQ(ierr); 3353 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3354 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3355 } else { 3356 Mat subdomain_adj; 3357 IS new_ranks,new_ranks_contig; 3358 MatPartitioning partitioner; 3359 PetscInt prank,rstart=0,rend=0; 3360 PetscInt *is_indices,*oldranks; 3361 PetscBool aggregate; 3362 3363 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3364 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3365 prank = rank; 3366 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3367 /* 3368 for (i=0;i<size;i++) { 3369 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3370 } 3371 */ 3372 for (i=0;i<xadj[1];i++) { 3373 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3374 } 3375 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3376 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3377 if (aggregate) { 3378 PetscInt lrows,row,ncols,*cols; 3379 PetscMPIInt nrank; 3380 PetscScalar *vals; 3381 3382 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3383 lrows = 0; 3384 if (nrank<redprocs) { 3385 lrows = size/redprocs; 3386 if (nrank<size%redprocs) lrows++; 3387 } 3388 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3389 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3390 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3391 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3392 row = nrank; 3393 ncols = xadj[1]-xadj[0]; 3394 cols = adjncy; 3395 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3396 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3397 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3398 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3399 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3400 ierr = PetscFree(xadj);CHKERRQ(ierr); 3401 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3402 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3403 ierr = PetscFree(vals);CHKERRQ(ierr); 3404 } else { 3405 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3406 } 3407 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3408 3409 /* Partition */ 3410 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3411 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3412 if (use_vwgt) { 3413 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3414 v_wgt[0] = local_size; 3415 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3416 } 3417 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3418 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3419 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3420 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3421 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3422 3423 /* renumber new_ranks to avoid "holes" in new set of processors */ 3424 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3425 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3426 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3427 if (!redprocs) { 3428 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3429 } else { 3430 PetscInt idxs[1]; 3431 PetscMPIInt tag; 3432 MPI_Request *reqs; 3433 3434 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3435 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3436 for (i=rstart;i<rend;i++) { 3437 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3438 } 3439 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3440 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3441 ierr = PetscFree(reqs);CHKERRQ(ierr); 3442 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3443 } 3444 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3445 /* clean up */ 3446 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3447 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3448 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3449 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3450 } 3451 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3452 3453 /* assemble parallel IS for sends */ 3454 i = 1; 3455 if (color) i=0; 3456 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3457 /* get back IS */ 3458 *is_sends = ranks_send_to; 3459 PetscFunctionReturn(0); 3460 } 3461 3462 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3463 3464 #undef __FUNCT__ 3465 #define __FUNCT__ "MatISSubassemble" 3466 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3467 { 3468 Mat local_mat; 3469 IS is_sends_internal; 3470 PetscInt rows,cols,new_local_rows; 3471 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3472 PetscBool ismatis,isdense,newisdense,destroy_mat; 3473 ISLocalToGlobalMapping l2gmap; 3474 PetscInt* l2gmap_indices; 3475 const PetscInt* is_indices; 3476 MatType new_local_type; 3477 /* buffers */ 3478 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3479 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3480 PetscInt *recv_buffer_idxs_local; 3481 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3482 /* MPI */ 3483 MPI_Comm comm,comm_n; 3484 PetscSubcomm subcomm; 3485 PetscMPIInt n_sends,n_recvs,commsize; 3486 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3487 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3488 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3489 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3490 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 /* TODO: add missing checks */ 3495 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3496 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3497 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3498 PetscValidLogicalCollectiveInt(mat,nis,7); 3499 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3500 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3501 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3502 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3503 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3504 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3505 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3506 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3507 PetscInt mrows,mcols,mnrows,mncols; 3508 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3509 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3510 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3511 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3512 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3513 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3514 } 3515 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3516 PetscValidLogicalCollectiveInt(mat,bs,0); 3517 /* prepare IS for sending if not provided */ 3518 if (!is_sends) { 3519 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3520 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3521 } else { 3522 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3523 is_sends_internal = is_sends; 3524 } 3525 3526 /* get comm */ 3527 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3528 3529 /* compute number of sends */ 3530 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3531 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3532 3533 /* compute number of receives */ 3534 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3536 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3537 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3538 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3539 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3540 ierr = PetscFree(iflags);CHKERRQ(ierr); 3541 3542 /* restrict comm if requested */ 3543 subcomm = 0; 3544 destroy_mat = PETSC_FALSE; 3545 if (restrict_comm) { 3546 PetscMPIInt color,subcommsize; 3547 3548 color = 0; 3549 if (restrict_full) { 3550 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3551 } else { 3552 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3553 } 3554 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3555 subcommsize = commsize - subcommsize; 3556 /* check if reuse has been requested */ 3557 if (reuse == MAT_REUSE_MATRIX) { 3558 if (*mat_n) { 3559 PetscMPIInt subcommsize2; 3560 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3561 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3562 comm_n = PetscObjectComm((PetscObject)*mat_n); 3563 } else { 3564 comm_n = PETSC_COMM_SELF; 3565 } 3566 } else { /* MAT_INITIAL_MATRIX */ 3567 PetscMPIInt rank; 3568 3569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3570 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3571 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3572 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3573 comm_n = PetscSubcommChild(subcomm); 3574 } 3575 /* flag to destroy *mat_n if not significative */ 3576 if (color) destroy_mat = PETSC_TRUE; 3577 } else { 3578 comm_n = comm; 3579 } 3580 3581 /* prepare send/receive buffers */ 3582 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3583 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3584 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3585 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3586 if (nis) { 3587 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3588 } 3589 3590 /* Get data from local matrices */ 3591 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3592 /* TODO: See below some guidelines on how to prepare the local buffers */ 3593 /* 3594 send_buffer_vals should contain the raw values of the local matrix 3595 send_buffer_idxs should contain: 3596 - MatType_PRIVATE type 3597 - PetscInt size_of_l2gmap 3598 - PetscInt global_row_indices[size_of_l2gmap] 3599 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3600 */ 3601 else { 3602 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3603 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3604 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3605 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3606 send_buffer_idxs[1] = i; 3607 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3608 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3609 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3610 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3611 for (i=0;i<n_sends;i++) { 3612 ilengths_vals[is_indices[i]] = len*len; 3613 ilengths_idxs[is_indices[i]] = len+2; 3614 } 3615 } 3616 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3617 /* additional is (if any) */ 3618 if (nis) { 3619 PetscMPIInt psum; 3620 PetscInt j; 3621 for (j=0,psum=0;j<nis;j++) { 3622 PetscInt plen; 3623 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3624 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3625 psum += len+1; /* indices + lenght */ 3626 } 3627 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3628 for (j=0,psum=0;j<nis;j++) { 3629 PetscInt plen; 3630 const PetscInt *is_array_idxs; 3631 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3632 send_buffer_idxs_is[psum] = plen; 3633 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3634 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3635 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3636 psum += plen+1; /* indices + lenght */ 3637 } 3638 for (i=0;i<n_sends;i++) { 3639 ilengths_idxs_is[is_indices[i]] = psum; 3640 } 3641 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3642 } 3643 3644 buf_size_idxs = 0; 3645 buf_size_vals = 0; 3646 buf_size_idxs_is = 0; 3647 for (i=0;i<n_recvs;i++) { 3648 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3649 buf_size_vals += (PetscInt)olengths_vals[i]; 3650 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3651 } 3652 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3653 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3654 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3655 3656 /* get new tags for clean communications */ 3657 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3658 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3660 3661 /* allocate for requests */ 3662 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3663 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3664 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3665 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3667 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3668 3669 /* communications */ 3670 ptr_idxs = recv_buffer_idxs; 3671 ptr_vals = recv_buffer_vals; 3672 ptr_idxs_is = recv_buffer_idxs_is; 3673 for (i=0;i<n_recvs;i++) { 3674 source_dest = onodes[i]; 3675 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3676 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3677 ptr_idxs += olengths_idxs[i]; 3678 ptr_vals += olengths_vals[i]; 3679 if (nis) { 3680 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3681 ptr_idxs_is += olengths_idxs_is[i]; 3682 } 3683 } 3684 for (i=0;i<n_sends;i++) { 3685 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3686 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3687 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3688 if (nis) { 3689 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3690 } 3691 } 3692 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3693 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3694 3695 /* assemble new l2g map */ 3696 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3697 ptr_idxs = recv_buffer_idxs; 3698 new_local_rows = 0; 3699 for (i=0;i<n_recvs;i++) { 3700 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3701 ptr_idxs += olengths_idxs[i]; 3702 } 3703 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3704 ptr_idxs = recv_buffer_idxs; 3705 new_local_rows = 0; 3706 for (i=0;i<n_recvs;i++) { 3707 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3708 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3709 ptr_idxs += olengths_idxs[i]; 3710 } 3711 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3712 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3713 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3714 3715 /* infer new local matrix type from received local matrices type */ 3716 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3717 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3718 if (n_recvs) { 3719 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3720 ptr_idxs = recv_buffer_idxs; 3721 for (i=0;i<n_recvs;i++) { 3722 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3723 new_local_type_private = MATAIJ_PRIVATE; 3724 break; 3725 } 3726 ptr_idxs += olengths_idxs[i]; 3727 } 3728 switch (new_local_type_private) { 3729 case MATDENSE_PRIVATE: 3730 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3731 new_local_type = MATSEQAIJ; 3732 bs = 1; 3733 } else { /* if I receive only 1 dense matrix */ 3734 new_local_type = MATSEQDENSE; 3735 bs = 1; 3736 } 3737 break; 3738 case MATAIJ_PRIVATE: 3739 new_local_type = MATSEQAIJ; 3740 bs = 1; 3741 break; 3742 case MATBAIJ_PRIVATE: 3743 new_local_type = MATSEQBAIJ; 3744 break; 3745 case MATSBAIJ_PRIVATE: 3746 new_local_type = MATSEQSBAIJ; 3747 break; 3748 default: 3749 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3750 break; 3751 } 3752 } else { /* by default, new_local_type is seqdense */ 3753 new_local_type = MATSEQDENSE; 3754 bs = 1; 3755 } 3756 3757 /* create MATIS object if needed */ 3758 if (reuse == MAT_INITIAL_MATRIX) { 3759 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3760 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3761 } else { 3762 /* it also destroys the local matrices */ 3763 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3764 } 3765 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3766 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3767 3768 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3769 3770 /* Global to local map of received indices */ 3771 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3772 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3773 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3774 3775 /* restore attributes -> type of incoming data and its size */ 3776 buf_size_idxs = 0; 3777 for (i=0;i<n_recvs;i++) { 3778 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3779 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3780 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3781 } 3782 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3783 3784 /* set preallocation */ 3785 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3786 if (!newisdense) { 3787 PetscInt *new_local_nnz=0; 3788 3789 ptr_idxs = recv_buffer_idxs_local; 3790 if (n_recvs) { 3791 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3792 } 3793 for (i=0;i<n_recvs;i++) { 3794 PetscInt j; 3795 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3796 for (j=0;j<*(ptr_idxs+1);j++) { 3797 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3798 } 3799 } else { 3800 /* TODO */ 3801 } 3802 ptr_idxs += olengths_idxs[i]; 3803 } 3804 if (new_local_nnz) { 3805 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3806 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3807 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3808 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3809 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3810 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3811 } else { 3812 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3813 } 3814 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3815 } else { 3816 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3817 } 3818 3819 /* set values */ 3820 ptr_vals = recv_buffer_vals; 3821 ptr_idxs = recv_buffer_idxs_local; 3822 for (i=0;i<n_recvs;i++) { 3823 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3824 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3825 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3826 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3827 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3828 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3829 } else { 3830 /* TODO */ 3831 } 3832 ptr_idxs += olengths_idxs[i]; 3833 ptr_vals += olengths_vals[i]; 3834 } 3835 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3837 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3840 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3841 3842 #if 0 3843 if (!restrict_comm) { /* check */ 3844 Vec lvec,rvec; 3845 PetscReal infty_error; 3846 3847 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3848 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3849 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3850 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3851 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3852 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3853 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3854 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3855 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3856 } 3857 #endif 3858 3859 /* assemble new additional is (if any) */ 3860 if (nis) { 3861 PetscInt **temp_idxs,*count_is,j,psum; 3862 3863 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3864 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3865 ptr_idxs = recv_buffer_idxs_is; 3866 psum = 0; 3867 for (i=0;i<n_recvs;i++) { 3868 for (j=0;j<nis;j++) { 3869 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3870 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3871 psum += plen; 3872 ptr_idxs += plen+1; /* shift pointer to received data */ 3873 } 3874 } 3875 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3876 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3877 for (i=1;i<nis;i++) { 3878 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3879 } 3880 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3881 ptr_idxs = recv_buffer_idxs_is; 3882 for (i=0;i<n_recvs;i++) { 3883 for (j=0;j<nis;j++) { 3884 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3885 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3886 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3887 ptr_idxs += plen+1; /* shift pointer to received data */ 3888 } 3889 } 3890 for (i=0;i<nis;i++) { 3891 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3892 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3893 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3894 } 3895 ierr = PetscFree(count_is);CHKERRQ(ierr); 3896 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3897 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3898 } 3899 /* free workspace */ 3900 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3901 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3902 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3903 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3904 if (isdense) { 3905 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3906 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3907 } else { 3908 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3909 } 3910 if (nis) { 3911 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3912 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3913 } 3914 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3915 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3916 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3917 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3918 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3919 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3920 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3921 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3922 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3923 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3924 ierr = PetscFree(onodes);CHKERRQ(ierr); 3925 if (nis) { 3926 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3927 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3928 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3929 } 3930 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3931 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3932 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3933 for (i=0;i<nis;i++) { 3934 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3935 } 3936 *mat_n = NULL; 3937 } 3938 PetscFunctionReturn(0); 3939 } 3940 3941 /* temporary hack into ksp private data structure */ 3942 #include <petsc/private/kspimpl.h> 3943 3944 #undef __FUNCT__ 3945 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3946 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3947 { 3948 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3949 PC_IS *pcis = (PC_IS*)pc->data; 3950 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3951 MatNullSpace CoarseNullSpace=NULL; 3952 ISLocalToGlobalMapping coarse_islg; 3953 IS coarse_is,*isarray; 3954 PetscInt i,im_active=-1,active_procs=-1; 3955 PetscInt nis,nisdofs,nisneu; 3956 PC pc_temp; 3957 PCType coarse_pc_type; 3958 KSPType coarse_ksp_type; 3959 PetscBool multilevel_requested,multilevel_allowed; 3960 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3961 Mat t_coarse_mat_is; 3962 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3963 PetscMPIInt all_procs; 3964 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3965 PetscBool compute_vecs = PETSC_FALSE; 3966 PetscScalar *array; 3967 PetscErrorCode ierr; 3968 3969 PetscFunctionBegin; 3970 /* Assign global numbering to coarse dofs */ 3971 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3972 PetscInt ocoarse_size; 3973 compute_vecs = PETSC_TRUE; 3974 ocoarse_size = pcbddc->coarse_size; 3975 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3976 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3977 /* see if we can avoid some work */ 3978 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3979 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 3980 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 3981 PC pc; 3982 PetscBool isbddc; 3983 3984 /* temporary workaround since PCBDDC does not have a reset method so far */ 3985 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 3986 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3987 if (isbddc) { 3988 ierr = PCDestroy(&pc);CHKERRQ(ierr); 3989 } 3990 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3991 coarse_reuse = PETSC_FALSE; 3992 } else { /* we can safely reuse already computed coarse matrix */ 3993 coarse_reuse = PETSC_TRUE; 3994 } 3995 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3996 coarse_reuse = PETSC_FALSE; 3997 } 3998 /* reset any subassembling information */ 3999 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4000 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4001 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4002 coarse_reuse = PETSC_TRUE; 4003 } 4004 4005 /* count "active" (i.e. with positive local size) and "void" processes */ 4006 im_active = !!(pcis->n); 4007 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4009 void_procs = all_procs-active_procs; 4010 csin_type_simple = PETSC_TRUE; 4011 redist = PETSC_FALSE; 4012 if (pcbddc->current_level && void_procs) { 4013 csin_ml = PETSC_TRUE; 4014 ncoarse_ml = void_procs; 4015 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4016 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4017 csin_ds = PETSC_TRUE; 4018 ncoarse_ds = pcbddc->redistribute_coarse; 4019 redist = PETSC_TRUE; 4020 } else { 4021 csin_ds = PETSC_TRUE; 4022 ncoarse_ds = active_procs; 4023 redist = PETSC_TRUE; 4024 } 4025 } else { 4026 csin_ml = PETSC_FALSE; 4027 ncoarse_ml = all_procs; 4028 if (void_procs) { 4029 csin_ds = PETSC_TRUE; 4030 ncoarse_ds = void_procs; 4031 csin_type_simple = PETSC_FALSE; 4032 } else { 4033 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4034 csin_ds = PETSC_TRUE; 4035 ncoarse_ds = pcbddc->redistribute_coarse; 4036 redist = PETSC_TRUE; 4037 } else { 4038 csin_ds = PETSC_FALSE; 4039 ncoarse_ds = all_procs; 4040 } 4041 } 4042 } 4043 4044 /* 4045 test if we can go multilevel: three conditions must be satisfied: 4046 - we have not exceeded the number of levels requested 4047 - we can actually subassemble the active processes 4048 - we can find a suitable number of MPI processes where we can place the subassembled problem 4049 */ 4050 multilevel_allowed = PETSC_FALSE; 4051 multilevel_requested = PETSC_FALSE; 4052 if (pcbddc->current_level < pcbddc->max_levels) { 4053 multilevel_requested = PETSC_TRUE; 4054 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4055 multilevel_allowed = PETSC_FALSE; 4056 } else { 4057 multilevel_allowed = PETSC_TRUE; 4058 } 4059 } 4060 /* determine number of process partecipating to coarse solver */ 4061 if (multilevel_allowed) { 4062 ncoarse = ncoarse_ml; 4063 csin = csin_ml; 4064 redist = PETSC_FALSE; 4065 } else { 4066 ncoarse = ncoarse_ds; 4067 csin = csin_ds; 4068 } 4069 4070 /* creates temporary l2gmap and IS for coarse indexes */ 4071 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4072 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4073 4074 /* creates temporary MATIS object for coarse matrix */ 4075 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4076 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4077 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4078 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4079 #if 0 4080 { 4081 PetscViewer viewer; 4082 char filename[256]; 4083 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4084 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4085 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4086 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4087 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4088 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4089 } 4090 #endif 4091 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4092 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4093 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4094 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4095 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4096 4097 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4098 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4099 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4100 const PetscInt *idxs; 4101 ISLocalToGlobalMapping tmap; 4102 4103 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4104 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4105 /* allocate space for temporary storage */ 4106 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4107 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4108 /* allocate for IS array */ 4109 nisdofs = pcbddc->n_ISForDofsLocal; 4110 nisneu = !!pcbddc->NeumannBoundariesLocal; 4111 nis = nisdofs + nisneu; 4112 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4113 /* dofs splitting */ 4114 for (i=0;i<nisdofs;i++) { 4115 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4116 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4117 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4118 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4119 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4120 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4121 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4122 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4123 } 4124 /* neumann boundaries */ 4125 if (pcbddc->NeumannBoundariesLocal) { 4126 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4127 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4128 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4129 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4130 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4131 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4132 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4133 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4134 } 4135 /* free memory */ 4136 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4137 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4138 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4139 } else { 4140 nis = 0; 4141 nisdofs = 0; 4142 nisneu = 0; 4143 isarray = NULL; 4144 } 4145 /* destroy no longer needed map */ 4146 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4147 4148 /* restrict on coarse candidates (if needed) */ 4149 coarse_mat_is = NULL; 4150 if (csin) { 4151 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4152 if (redist) { 4153 PetscMPIInt rank; 4154 PetscInt spc,n_spc_p1,dest[1],destsize; 4155 4156 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4157 spc = active_procs/ncoarse; 4158 n_spc_p1 = active_procs%ncoarse; 4159 if (im_active) { 4160 destsize = 1; 4161 if (rank > n_spc_p1*(spc+1)-1) { 4162 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4163 } else { 4164 dest[0] = rank/(spc+1); 4165 } 4166 } else { 4167 destsize = 0; 4168 } 4169 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4170 } else if (csin_type_simple) { 4171 PetscMPIInt rank; 4172 PetscInt issize,isidx; 4173 4174 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4175 if (im_active) { 4176 issize = 1; 4177 isidx = (PetscInt)rank; 4178 } else { 4179 issize = 0; 4180 isidx = -1; 4181 } 4182 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4183 } else { /* get a suitable subassembling pattern from MATIS code */ 4184 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4185 } 4186 4187 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4188 if (!redist || ncoarse <= void_procs) { 4189 PetscInt ncoarse_cand,tissize,*nisindices; 4190 PetscInt *coarse_candidates; 4191 const PetscInt* tisindices; 4192 4193 /* get coarse candidates' ranks in pc communicator */ 4194 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4195 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4196 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4197 if (!coarse_candidates[i]) { 4198 coarse_candidates[ncoarse_cand++]=i; 4199 } 4200 } 4201 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4202 4203 4204 if (pcbddc->dbg_flag) { 4205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4207 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4208 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4209 for (i=0;i<ncoarse_cand;i++) { 4210 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4211 } 4212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4214 } 4215 /* shift the pattern on coarse candidates */ 4216 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4217 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4218 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4219 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4220 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4221 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4222 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4223 } 4224 if (pcbddc->dbg_flag) { 4225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4227 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4229 } 4230 } 4231 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4232 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4233 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4234 } else { /* this is the last level, so use just receiving processes in subcomm */ 4235 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4236 } 4237 } else { 4238 if (pcbddc->dbg_flag) { 4239 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4240 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4242 } 4243 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4244 coarse_mat_is = t_coarse_mat_is; 4245 } 4246 4247 /* create local to global scatters for coarse problem */ 4248 if (compute_vecs) { 4249 PetscInt lrows; 4250 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4251 if (coarse_mat_is) { 4252 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4253 } else { 4254 lrows = 0; 4255 } 4256 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4257 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4258 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4259 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4260 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4261 } 4262 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4263 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4264 4265 /* set defaults for coarse KSP and PC */ 4266 if (multilevel_allowed) { 4267 coarse_ksp_type = KSPRICHARDSON; 4268 coarse_pc_type = PCBDDC; 4269 } else { 4270 coarse_ksp_type = KSPPREONLY; 4271 coarse_pc_type = PCREDUNDANT; 4272 } 4273 4274 /* print some info if requested */ 4275 if (pcbddc->dbg_flag) { 4276 if (!multilevel_allowed) { 4277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4278 if (multilevel_requested) { 4279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4280 } else if (pcbddc->max_levels) { 4281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4282 } 4283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4284 } 4285 } 4286 4287 /* create the coarse KSP object only once with defaults */ 4288 if (coarse_mat_is) { 4289 MatReuse coarse_mat_reuse; 4290 PetscViewer dbg_viewer = NULL; 4291 if (pcbddc->dbg_flag) { 4292 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4293 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4294 } 4295 if (!pcbddc->coarse_ksp) { 4296 char prefix[256],str_level[16]; 4297 size_t len; 4298 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4299 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4300 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4301 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4302 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4303 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4304 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4305 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4306 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4307 /* prefix */ 4308 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4309 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4310 if (!pcbddc->current_level) { 4311 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4312 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4313 } else { 4314 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4315 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4316 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4317 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4318 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4319 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4320 } 4321 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4322 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4323 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4324 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4325 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4326 /* allow user customization */ 4327 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4328 } 4329 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4330 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4331 if (nisdofs) { 4332 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4333 for (i=0;i<nisdofs;i++) { 4334 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4335 } 4336 } 4337 if (nisneu) { 4338 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4339 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4340 } 4341 4342 /* get some info after set from options */ 4343 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4344 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4345 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4346 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4347 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4348 isbddc = PETSC_FALSE; 4349 } 4350 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4351 if (isredundant) { 4352 KSP inner_ksp; 4353 PC inner_pc; 4354 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4355 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4356 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4357 } 4358 4359 /* assemble coarse matrix */ 4360 if (coarse_reuse) { 4361 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4362 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4363 coarse_mat_reuse = MAT_REUSE_MATRIX; 4364 } else { 4365 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4366 } 4367 if (isbddc || isnn) { 4368 if (pcbddc->coarsening_ratio > 1) { 4369 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4370 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4371 if (pcbddc->dbg_flag) { 4372 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4373 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4374 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4375 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4376 } 4377 } 4378 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4379 } else { 4380 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4381 coarse_mat = coarse_mat_is; 4382 } 4383 } else { 4384 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4385 } 4386 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4387 4388 /* propagate symmetry info of coarse matrix */ 4389 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4390 if (pc->pmat->symmetric_set) { 4391 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4392 } 4393 if (pc->pmat->hermitian_set) { 4394 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4395 } 4396 if (pc->pmat->spd_set) { 4397 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4398 } 4399 /* set operators */ 4400 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4401 if (pcbddc->dbg_flag) { 4402 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4403 } 4404 } else { /* processes non partecipating to coarse solver (if any) */ 4405 coarse_mat = 0; 4406 } 4407 ierr = PetscFree(isarray);CHKERRQ(ierr); 4408 #if 0 4409 { 4410 PetscViewer viewer; 4411 char filename[256]; 4412 sprintf(filename,"coarse_mat.m"); 4413 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4414 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4415 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4416 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4417 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4418 } 4419 #endif 4420 4421 if (pcbddc->coarse_ksp) { 4422 Vec crhs,csol; 4423 PetscBool ispreonly; 4424 4425 if (CoarseNullSpace) { 4426 if (isbddc) { 4427 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4428 } else { 4429 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4430 } 4431 } 4432 /* setup coarse ksp */ 4433 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4434 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4435 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4436 /* hack */ 4437 if (!csol) { 4438 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4439 } 4440 if (!crhs) { 4441 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4442 } 4443 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4444 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4445 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4446 KSP check_ksp; 4447 KSPType check_ksp_type; 4448 PC check_pc; 4449 Vec check_vec,coarse_vec; 4450 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4451 PetscInt its; 4452 PetscBool compute_eigs; 4453 PetscReal *eigs_r,*eigs_c; 4454 PetscInt neigs; 4455 const char *prefix; 4456 4457 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4458 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4459 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4460 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4461 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4462 if (ispreonly) { 4463 check_ksp_type = KSPPREONLY; 4464 compute_eigs = PETSC_FALSE; 4465 } else { 4466 check_ksp_type = KSPGMRES; 4467 compute_eigs = PETSC_TRUE; 4468 } 4469 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4470 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4471 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4472 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4473 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4474 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4475 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4476 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4477 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4478 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4479 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4480 /* create random vec */ 4481 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4482 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4483 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4484 if (CoarseNullSpace) { 4485 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4486 } 4487 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4488 /* solve coarse problem */ 4489 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4490 if (CoarseNullSpace) { 4491 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4492 } 4493 /* set eigenvalue estimation if preonly has not been requested */ 4494 if (compute_eigs) { 4495 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4496 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4497 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4498 lambda_max = eigs_r[neigs-1]; 4499 lambda_min = eigs_r[0]; 4500 if (pcbddc->use_coarse_estimates) { 4501 if (lambda_max>lambda_min) { 4502 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4503 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4504 } 4505 } 4506 } 4507 4508 /* check coarse problem residual error */ 4509 if (pcbddc->dbg_flag) { 4510 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4511 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4512 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4513 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4514 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4515 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4516 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4517 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4518 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4519 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4520 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4522 if (compute_eigs) { 4523 PetscReal lambda_max_s,lambda_min_s; 4524 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4525 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4526 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4527 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4528 for (i=0;i<neigs;i++) { 4529 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4530 } 4531 } 4532 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4533 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4534 } 4535 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4536 if (compute_eigs) { 4537 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4538 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4539 } 4540 } 4541 } 4542 /* print additional info */ 4543 if (pcbddc->dbg_flag) { 4544 /* waits until all processes reaches this point */ 4545 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4548 } 4549 4550 /* free memory */ 4551 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4552 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 #undef __FUNCT__ 4557 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4558 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4559 { 4560 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4561 PC_IS* pcis = (PC_IS*)pc->data; 4562 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4563 IS subset,subset_mult,subset_n; 4564 PetscInt local_size,coarse_size=0; 4565 PetscInt *local_primal_indices=NULL; 4566 const PetscInt *t_local_primal_indices; 4567 PetscErrorCode ierr; 4568 4569 PetscFunctionBegin; 4570 /* Compute global number of coarse dofs */ 4571 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4572 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4573 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4574 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4575 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4576 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4577 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4578 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4579 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4580 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4581 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4582 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4583 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4584 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4585 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4586 4587 /* check numbering */ 4588 if (pcbddc->dbg_flag) { 4589 PetscScalar coarsesum,*array; 4590 PetscInt i; 4591 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4592 4593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4596 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4597 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4598 for (i=0;i<pcbddc->local_primal_size;i++) { 4599 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4600 } 4601 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4602 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4603 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4604 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4605 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4606 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4607 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4608 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4609 for (i=0;i<pcis->n;i++) { 4610 if (array[i] == 1.0) { 4611 set_error = PETSC_TRUE; 4612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4613 } 4614 } 4615 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4616 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4617 for (i=0;i<pcis->n;i++) { 4618 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4619 } 4620 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4621 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4622 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4623 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4624 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4625 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4626 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4627 PetscInt *gidxs; 4628 4629 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4630 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4632 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4634 for (i=0;i<pcbddc->local_primal_size;i++) { 4635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4636 } 4637 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4638 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4639 } 4640 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4641 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4642 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4643 } 4644 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4645 /* get back data */ 4646 *coarse_size_n = coarse_size; 4647 *local_primal_indices_n = local_primal_indices; 4648 PetscFunctionReturn(0); 4649 } 4650 4651 #undef __FUNCT__ 4652 #define __FUNCT__ "PCBDDCGlobalToLocal" 4653 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4654 { 4655 IS localis_t; 4656 PetscInt i,lsize,*idxs,n; 4657 PetscScalar *vals; 4658 PetscErrorCode ierr; 4659 4660 PetscFunctionBegin; 4661 /* get indices in local ordering exploiting local to global map */ 4662 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4664 for (i=0;i<lsize;i++) vals[i] = 1.0; 4665 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4666 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4667 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4668 if (idxs) { /* multilevel guard */ 4669 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4670 } 4671 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4672 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4673 ierr = PetscFree(vals);CHKERRQ(ierr); 4674 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4675 /* now compute set in local ordering */ 4676 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4677 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4678 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4679 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4680 for (i=0,lsize=0;i<n;i++) { 4681 if (PetscRealPart(vals[i]) > 0.5) { 4682 lsize++; 4683 } 4684 } 4685 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4686 for (i=0,lsize=0;i<n;i++) { 4687 if (PetscRealPart(vals[i]) > 0.5) { 4688 idxs[lsize++] = i; 4689 } 4690 } 4691 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4692 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4693 *localis = localis_t; 4694 PetscFunctionReturn(0); 4695 } 4696 4697 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4698 #undef __FUNCT__ 4699 #define __FUNCT__ "PCBDDCMatMult_Private" 4700 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4701 { 4702 PCBDDCChange_ctx change_ctx; 4703 PetscErrorCode ierr; 4704 4705 PetscFunctionBegin; 4706 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4707 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4708 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4709 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4710 PetscFunctionReturn(0); 4711 } 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4715 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4716 { 4717 PCBDDCChange_ctx change_ctx; 4718 PetscErrorCode ierr; 4719 4720 PetscFunctionBegin; 4721 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4722 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4723 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4724 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4725 PetscFunctionReturn(0); 4726 } 4727 4728 #undef __FUNCT__ 4729 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4730 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4731 { 4732 PC_IS *pcis=(PC_IS*)pc->data; 4733 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4734 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4735 Mat S_j; 4736 PetscInt *used_xadj,*used_adjncy; 4737 PetscBool free_used_adj; 4738 PetscErrorCode ierr; 4739 4740 PetscFunctionBegin; 4741 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4742 free_used_adj = PETSC_FALSE; 4743 if (pcbddc->sub_schurs_layers == -1) { 4744 used_xadj = NULL; 4745 used_adjncy = NULL; 4746 } else { 4747 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4748 used_xadj = pcbddc->mat_graph->xadj; 4749 used_adjncy = pcbddc->mat_graph->adjncy; 4750 } else if (pcbddc->computed_rowadj) { 4751 used_xadj = pcbddc->mat_graph->xadj; 4752 used_adjncy = pcbddc->mat_graph->adjncy; 4753 } else { 4754 PetscBool flg_row=PETSC_FALSE; 4755 const PetscInt *xadj,*adjncy; 4756 PetscInt nvtxs; 4757 4758 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4759 if (flg_row) { 4760 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4761 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4762 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4763 free_used_adj = PETSC_TRUE; 4764 } else { 4765 pcbddc->sub_schurs_layers = -1; 4766 used_xadj = NULL; 4767 used_adjncy = NULL; 4768 } 4769 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4770 } 4771 } 4772 4773 /* setup sub_schurs data */ 4774 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4775 if (!sub_schurs->use_mumps) { 4776 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4777 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4778 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4779 } else { 4780 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4781 PetscBool isseqaij; 4782 if (!pcbddc->use_vertices && reuse_solvers) { 4783 PetscInt n_vertices; 4784 4785 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4786 reuse_solvers = (PetscBool)!n_vertices; 4787 } 4788 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4789 if (!isseqaij) { 4790 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4791 if (matis->A == pcbddc->local_mat) { 4792 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4793 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4794 } else { 4795 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4796 } 4797 } 4798 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4799 } 4800 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4801 4802 /* free adjacency */ 4803 if (free_used_adj) { 4804 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4805 } 4806 PetscFunctionReturn(0); 4807 } 4808 4809 #undef __FUNCT__ 4810 #define __FUNCT__ "PCBDDCInitSubSchurs" 4811 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4812 { 4813 PC_IS *pcis=(PC_IS*)pc->data; 4814 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4815 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4816 PCBDDCGraph graph; 4817 PetscErrorCode ierr; 4818 4819 PetscFunctionBegin; 4820 /* attach interface graph for determining subsets */ 4821 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4822 IS verticesIS,verticescomm; 4823 PetscInt vsize,*idxs; 4824 4825 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4826 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4827 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4828 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4829 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4830 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4831 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4832 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4833 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4834 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4835 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4836 /* 4837 if (pcbddc->dbg_flag) { 4838 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4839 } 4840 */ 4841 } else { 4842 graph = pcbddc->mat_graph; 4843 } 4844 4845 /* sub_schurs init */ 4846 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4847 4848 /* free graph struct */ 4849 if (pcbddc->sub_schurs_rebuild) { 4850 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4851 } 4852 PetscFunctionReturn(0); 4853 } 4854