1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cum2 = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 cum2 = cum; 141 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 142 } 143 144 if (mss) { /* multilevel */ 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 147 } 148 149 for (i=0;i<sub_schurs->n_subs;i++) { 150 151 const PetscInt *idxs; 152 PetscReal infty = PETSC_MAX_REAL; 153 PetscInt j,subset_size,eigs_start = 0; 154 PetscBLASInt B_N; 155 PetscBool same_data = PETSC_FALSE; 156 157 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 158 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 159 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 160 if (sub_schurs->is_hermitian) { 161 PetscInt j,k; 162 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 163 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 } 166 for (j=0;j<subset_size;j++) { 167 for (k=j;k<subset_size;k++) { 168 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 169 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 170 } 171 } 172 } else { 173 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 } 176 } else { 177 S = Sarray + cumarray; 178 St = Starray + cumarray; 179 } 180 181 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 182 /* see if we can save some work */ 183 if (sub_schurs->n_subs == 1) { 184 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 185 } 186 187 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 188 B_neigs = 0; 189 } else { 190 /* Threshold: this is an heuristic for edges */ 191 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 192 193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 194 PetscBLASInt B_itype = 1; 195 PetscBLASInt B_IL, B_IU; 196 PetscReal eps = -1.0; /* dlamch? */ 197 PetscInt nmin_s; 198 199 /* ask for eigenvalues larger than thresh */ 200 if (pcbddc->dbg_flag) { 201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 202 } 203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 204 #if defined(PETSC_USE_COMPLEX) 205 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 206 #else 207 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 208 #endif 209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 210 if (B_ierr) { 211 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 212 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 213 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 214 } 215 216 if (B_neigs > nmax) { 217 if (pcbddc->dbg_flag) { 218 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 219 } 220 eigs_start = B_neigs -nmax; 221 B_neigs = nmax; 222 } 223 224 nmin_s = PetscMin(nmin,B_N); 225 if (B_neigs < nmin_s) { 226 PetscBLASInt B_neigs2; 227 228 B_IU = B_N - B_neigs; 229 B_IL = B_N - nmin_s + 1; 230 if (pcbddc->dbg_flag) { 231 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 232 } 233 if (sub_schurs->is_hermitian) { 234 PetscInt j; 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 for (j=0;j<subset_size;j++) { 239 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 240 } 241 } else { 242 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 246 #if defined(PETSC_USE_COMPLEX) 247 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 248 #else 249 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 250 #endif 251 ierr = PetscFPTrapPop();CHKERRQ(ierr); 252 B_neigs += B_neigs2; 253 } 254 if (B_ierr) { 255 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 256 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 257 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 if (pcbddc->dbg_flag) { 260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 261 for (j=0;j<B_neigs;j++) { 262 if (eigs[j] == 0.0) { 263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 264 } else { 265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 266 } 267 } 268 } 269 } else { 270 /* TODO */ 271 } 272 } 273 maxneigs = PetscMax(B_neigs,maxneigs); 274 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 275 if (B_neigs) { 276 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 277 278 if (pcbddc->dbg_flag > 1) { 279 PetscInt ii; 280 for (ii=0;ii<B_neigs;ii++) { 281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 282 for (j=0;j<B_N;j++) { 283 #if defined(PETSC_USE_COMPLEX) 284 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 287 #else 288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 289 #endif 290 } 291 } 292 } 293 #if 0 294 for (j=0;j<B_neigs;j++) { 295 PetscBLASInt Blas_N,Blas_one = 1.0; 296 PetscScalar norm; 297 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 298 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 299 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 300 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 301 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 302 } else { 303 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 304 } 305 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 306 } 307 #endif 308 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 309 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 310 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 311 cum++; 312 } 313 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 314 /* shift for next computation */ 315 cumarray += subset_size*subset_size; 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 319 } 320 321 if (mss) { 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 323 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 324 /* destroy matrices (junk) */ 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 326 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 327 } 328 if (allocated_S_St) { 329 ierr = PetscFree2(S,St);CHKERRQ(ierr); 330 } 331 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 332 #if defined(PETSC_USE_COMPLEX) 333 ierr = PetscFree(rwork);CHKERRQ(ierr); 334 #endif 335 if (pcbddc->dbg_flag) { 336 PetscInt maxneigs_r; 337 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 339 } 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "PCBDDCSetUpSolvers" 345 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 346 { 347 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 348 PetscScalar *coarse_submat_vals; 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 /* Setup local scatters R_to_B and (optionally) R_to_D */ 353 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 354 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 355 356 /* Setup local neumann solver ksp_R */ 357 /* PCBDDCSetUpLocalScatters should be called first! */ 358 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 359 360 /* Change global null space passed in by the user if change of basis has been requested */ 361 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 362 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 363 } 364 365 /* 366 Setup local correction and local part of coarse basis. 367 Gives back the dense local part of the coarse matrix in column major ordering 368 */ 369 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* Compute total number of coarse nodes and setup coarse solver */ 372 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 373 374 /* free */ 375 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "PCBDDCResetCustomization" 381 PetscErrorCode PCBDDCResetCustomization(PC pc) 382 { 383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 390 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 392 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 393 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 394 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 396 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 397 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCResetTopography" 403 PetscErrorCode PCBDDCResetTopography(PC pc) 404 { 405 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 411 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 412 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 413 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "PCBDDCResetSolvers" 419 PetscErrorCode PCBDDCResetSolvers(PC pc) 420 { 421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 422 PetscScalar *array; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 427 if (pcbddc->coarse_phi_B) { 428 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 429 ierr = PetscFree(array);CHKERRQ(ierr); 430 } 431 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 432 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 459 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PC_IS *pcis = (PC_IS*)pc->data; 463 VecType impVecType; 464 PetscInt n_constraints,n_R,old_size; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 /* get sizes */ 470 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 471 n_R = pcis->n-pcbddc->n_vertices; 472 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 473 /* local work vectors (try to avoid unneeded work)*/ 474 /* R nodes */ 475 old_size = -1; 476 if (pcbddc->vec1_R) { 477 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 478 } 479 if (n_R != old_size) { 480 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 482 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 483 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 484 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 485 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 486 } 487 /* local primal dofs */ 488 old_size = -1; 489 if (pcbddc->vec1_P) { 490 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 491 } 492 if (pcbddc->local_primal_size != old_size) { 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 496 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 497 } 498 /* local explicit constraints */ 499 old_size = -1; 500 if (pcbddc->vec1_C) { 501 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 502 } 503 if (n_constraints && n_constraints != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCSetUpCorrection" 514 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 515 { 516 PetscErrorCode ierr; 517 /* pointers to pcis and pcbddc */ 518 PC_IS* pcis = (PC_IS*)pc->data; 519 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 520 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->symmetric_primal) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 IS is_aux; 697 698 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 699 IS tis; 700 701 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 702 ierr = ISSort(tis);CHKERRQ(ierr); 703 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 704 ierr = ISDestroy(&tis);CHKERRQ(ierr); 705 } else { 706 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 } 708 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 709 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 710 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 712 } 713 714 /* Matrix of coarse basis functions (local) */ 715 if (pcbddc->coarse_phi_B) { 716 PetscInt on_B,on_primal,on_D=n_D; 717 if (pcbddc->coarse_phi_D) { 718 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 719 } 720 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 721 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 722 PetscScalar *marray; 723 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 725 ierr = PetscFree(marray);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 730 } 731 } 732 733 if (!pcbddc->coarse_phi_B) { 734 PetscScalar *marray; 735 736 n = n_B*pcbddc->local_primal_size; 737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->symmetric_primal) { 741 n *= 2; 742 } 743 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 745 n = n_B*pcbddc->local_primal_size; 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 748 n += n_D*pcbddc->local_primal_size; 749 } 750 if (!pcbddc->symmetric_primal) { 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 752 if (pcbddc->switch_static || pcbddc->dbg_flag) { 753 n = n_B*pcbddc->local_primal_size; 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 755 } 756 } else { 757 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 762 } 763 } 764 } 765 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 766 /* vertices */ 767 if (n_vertices) { 768 769 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 770 771 if (n_R) { 772 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 773 PetscBLASInt B_N,B_one = 1; 774 PetscScalar *x,*y; 775 PetscBool isseqaij; 776 777 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 778 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 780 if (F) { /* TODO could be optimized for symmetric problems */ 781 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 782 } else { 783 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 786 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 787 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 788 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 789 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 790 } 791 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 792 } 793 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 794 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 795 if (n_constraints) { 796 Mat B; 797 798 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 801 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 802 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 806 } 807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 808 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 809 ierr = MatDestroy(&B);CHKERRQ(ierr); 810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 811 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 813 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 814 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 815 ierr = MatDestroy(&B);CHKERRQ(ierr); 816 } 817 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 818 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 819 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 820 } 821 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 822 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 823 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 825 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 827 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 829 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 830 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 831 } else { 832 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 833 } 834 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 835 /* coarse basis functions */ 836 for (i=0;i<n_vertices;i++) { 837 PetscScalar *y; 838 839 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 y[n_B*i+idx_V_B[i]] = 1.0; 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 847 848 if (pcbddc->switch_static || pcbddc->dbg_flag) { 849 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 850 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 851 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 854 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 855 } 856 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 857 } 858 /* if n_R == 0 the object is not destroyed */ 859 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 860 } 861 862 if (n_constraints) { 863 Mat B; 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 866 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 867 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 868 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 869 if (n_vertices) { 870 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 871 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 872 } else { 873 Mat S_VCt; 874 875 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 876 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 877 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 878 } 879 } 880 ierr = MatDestroy(&B);CHKERRQ(ierr); 881 /* coarse basis functions */ 882 for (i=0;i<n_constraints;i++) { 883 PetscScalar *y; 884 885 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 886 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 887 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 888 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 891 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 894 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 895 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 898 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 899 } 900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 901 } 902 } 903 if (n_constraints) { 904 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 905 } 906 907 /* compute other basis functions for non-symmetric problems */ 908 if (!pcbddc->symmetric_primal) { 909 910 if (n_constraints) { 911 Mat S_CCT,B_C; 912 913 /* this is a lazy thing */ 914 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 916 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 917 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 919 if (n_vertices) { 920 Mat B_V,S_VCT; 921 922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 923 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 924 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 925 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 929 } else { /* if there are no constraints, reset work */ 930 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 931 } 932 if (n_vertices && n_R) { 933 Mat A_VRT; 934 PetscScalar *marray; 935 PetscBLASInt B_N,B_one = 1; 936 937 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 938 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 939 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 940 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 941 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 942 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 943 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 944 } 945 946 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } else { 955 for (i=0;i<pcbddc->local_primal_size;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 958 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 } 963 /* coarse basis functions */ 964 for (i=0;i<pcbddc->local_primal_size;i++) { 965 PetscScalar *y; 966 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 968 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 969 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 if (i<n_vertices) { 973 y[n_B*i+idx_V_B[i]] = 1.0; 974 } 975 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 976 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 980 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 981 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 984 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 985 } 986 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 987 } 988 } 989 /* free memory */ 990 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 995 ierr = PetscFree(work);CHKERRQ(ierr); 996 if (n_vertices) { 997 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 998 } 999 if (n_constraints) { 1000 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1001 } 1002 /* Checking coarse_sub_mat and coarse basis functios */ 1003 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1004 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1005 if (pcbddc->dbg_flag) { 1006 Mat coarse_sub_mat; 1007 Mat AUXMAT,TM1,TM2,TM3,TM4; 1008 Mat coarse_phi_D,coarse_phi_B; 1009 Mat coarse_psi_D,coarse_psi_B; 1010 Mat A_II,A_BB,A_IB,A_BI; 1011 Mat C_B,CPHI; 1012 IS is_dummy; 1013 Vec mones; 1014 MatType checkmattype=MATSEQAIJ; 1015 PetscReal real_value; 1016 1017 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1018 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1019 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1020 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1021 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1022 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1023 if (unsymmetric_check) { 1024 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1025 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1026 } 1027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1028 1029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1032 if (unsymmetric_check) { 1033 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1040 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1041 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 } else { 1046 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1047 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1048 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1050 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1051 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1054 } 1055 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1056 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1057 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1058 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1059 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1063 1064 /* check constraints */ 1065 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1066 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1067 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1068 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1069 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1070 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1071 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1073 if (unsymmetric_check) { 1074 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1075 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1076 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1077 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 } 1080 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1081 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1083 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1084 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1090 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1091 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1092 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1093 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1094 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1095 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1096 if (unsymmetric_check) { 1097 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1098 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1099 } 1100 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1101 } 1102 /* get back data */ 1103 *coarse_submat_vals_n = coarse_submat_vals; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1109 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1110 { 1111 Mat *work_mat; 1112 IS isrow_s,iscol_s; 1113 PetscBool rsorted,csorted; 1114 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1119 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1120 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1121 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1122 1123 if (!rsorted) { 1124 const PetscInt *idxs; 1125 PetscInt *idxs_sorted,i; 1126 1127 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1128 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1129 for (i=0;i<rsize;i++) { 1130 idxs_perm_r[i] = i; 1131 } 1132 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1133 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1134 for (i=0;i<rsize;i++) { 1135 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1136 } 1137 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1141 isrow_s = isrow; 1142 } 1143 1144 if (!csorted) { 1145 if (isrow == iscol) { 1146 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1147 iscol_s = isrow_s; 1148 } else { 1149 const PetscInt *idxs; 1150 PetscInt *idxs_sorted,i; 1151 1152 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1154 for (i=0;i<csize;i++) { 1155 idxs_perm_c[i] = i; 1156 } 1157 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1158 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1159 for (i=0;i<csize;i++) { 1160 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1161 } 1162 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1163 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1164 } 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 iscol_s = iscol; 1168 } 1169 1170 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1171 1172 if (!rsorted || !csorted) { 1173 Mat new_mat; 1174 IS is_perm_r,is_perm_c; 1175 1176 if (!rsorted) { 1177 PetscInt *idxs_r,i; 1178 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1179 for (i=0;i<rsize;i++) { 1180 idxs_r[idxs_perm_r[i]] = i; 1181 } 1182 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1183 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1184 } else { 1185 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1186 } 1187 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1188 1189 if (!csorted) { 1190 if (isrow_s == iscol_s) { 1191 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1192 is_perm_c = is_perm_r; 1193 } else { 1194 PetscInt *idxs_c,i; 1195 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1196 for (i=0;i<csize;i++) { 1197 idxs_c[idxs_perm_c[i]] = i; 1198 } 1199 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1201 } 1202 } else { 1203 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1204 } 1205 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1206 1207 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1209 work_mat[0] = new_mat; 1210 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1211 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1212 } 1213 1214 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1215 *B = work_mat[0]; 1216 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1217 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1218 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1224 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1225 { 1226 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1227 PC_IS* pcis = (PC_IS*)pc->data; 1228 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1229 Mat new_mat; 1230 IS is_local,is_global; 1231 PetscInt local_size; 1232 PetscBool isseqaij; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1237 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1238 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1239 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1240 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1241 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1242 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1243 1244 /* check */ 1245 if (pcbddc->dbg_flag) { 1246 Vec x,x_change; 1247 PetscReal error; 1248 1249 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1250 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1251 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1252 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1258 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1261 ierr = VecDestroy(&x);CHKERRQ(ierr); 1262 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1263 } 1264 1265 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1266 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1267 if (isseqaij) { 1268 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1269 } else { 1270 Mat work_mat; 1271 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1272 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1273 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1274 } 1275 if (matis->A->symmetric_set) { 1276 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1277 #if !defined(PETSC_USE_COMPLEX) 1278 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1279 #endif 1280 } 1281 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1287 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1288 { 1289 PC_IS* pcis = (PC_IS*)(pc->data); 1290 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1291 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1292 PetscInt *idx_R_local=NULL; 1293 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1294 PetscInt vbs,bs; 1295 PetscBT bitmask=NULL; 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 /* 1300 No need to setup local scatters if 1301 - primal space is unchanged 1302 AND 1303 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1304 AND 1305 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1306 */ 1307 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1308 PetscFunctionReturn(0); 1309 } 1310 /* destroy old objects */ 1311 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1312 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1313 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1314 /* Set Non-overlapping dimensions */ 1315 n_B = pcis->n_B; 1316 n_D = pcis->n - n_B; 1317 n_vertices = pcbddc->n_vertices; 1318 1319 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1320 1321 /* create auxiliary bitmask and allocate workspace */ 1322 if (!sub_schurs->reuse_mumps) { 1323 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1324 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1325 for (i=0;i<n_vertices;i++) { 1326 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1327 } 1328 1329 for (i=0, n_R=0; i<pcis->n; i++) { 1330 if (!PetscBTLookup(bitmask,i)) { 1331 idx_R_local[n_R++] = i; 1332 } 1333 } 1334 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1335 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1336 1337 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1338 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1339 } 1340 1341 /* Block code */ 1342 vbs = 1; 1343 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1344 if (bs>1 && !(n_vertices%bs)) { 1345 PetscBool is_blocked = PETSC_TRUE; 1346 PetscInt *vary; 1347 if (!sub_schurs->reuse_mumps) { 1348 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1349 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1350 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1351 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1352 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1353 for (i=0; i<pcis->n/bs; i++) { 1354 if (vary[i]!=0 && vary[i]!=bs) { 1355 is_blocked = PETSC_FALSE; 1356 break; 1357 } 1358 } 1359 ierr = PetscFree(vary);CHKERRQ(ierr); 1360 } else { 1361 /* Verify directly the R set */ 1362 for (i=0; i<n_R/bs; i++) { 1363 PetscInt j,node=idx_R_local[bs*i]; 1364 for (j=1; j<bs; j++) { 1365 if (node != idx_R_local[bs*i+j]-j) { 1366 is_blocked = PETSC_FALSE; 1367 break; 1368 } 1369 } 1370 } 1371 } 1372 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1373 vbs = bs; 1374 for (i=0;i<n_R/vbs;i++) { 1375 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1376 } 1377 } 1378 } 1379 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1380 if (sub_schurs->reuse_mumps) { 1381 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1382 1383 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1384 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1385 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1386 reuse_mumps->is_R = pcbddc->is_R_local; 1387 } else { 1388 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1389 } 1390 1391 /* print some info if requested */ 1392 if (pcbddc->dbg_flag) { 1393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1398 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1399 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1400 } 1401 1402 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1403 if (!sub_schurs->reuse_mumps) { 1404 IS is_aux1,is_aux2; 1405 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1406 1407 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1408 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1409 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1410 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1411 for (i=0; i<n_D; i++) { 1412 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1413 } 1414 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1415 for (i=0, j=0; i<n_R; i++) { 1416 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1417 aux_array1[j++] = i; 1418 } 1419 } 1420 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1421 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1422 for (i=0, j=0; i<n_B; i++) { 1423 if (!PetscBTLookup(bitmask,is_indices[i])) { 1424 aux_array2[j++] = i; 1425 } 1426 } 1427 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1428 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1429 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1431 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1432 1433 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1434 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_R; i++) { 1436 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1437 aux_array1[j++] = i; 1438 } 1439 } 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1441 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1442 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1443 } 1444 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1445 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1446 } else { 1447 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1448 IS tis; 1449 PetscInt schur_size; 1450 1451 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1452 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1453 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1454 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1455 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1456 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1458 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1459 } 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1467 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1468 { 1469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1470 PC_IS *pcis = (PC_IS*)pc->data; 1471 PC pc_temp; 1472 Mat A_RR; 1473 MatReuse reuse; 1474 PetscScalar m_one = -1.0; 1475 PetscReal value; 1476 PetscInt n_D,n_R; 1477 PetscBool use_exact,use_exact_reduced,issbaij; 1478 PetscErrorCode ierr; 1479 /* prefixes stuff */ 1480 char dir_prefix[256],neu_prefix[256],str_level[16]; 1481 size_t len; 1482 1483 PetscFunctionBegin; 1484 1485 /* compute prefixes */ 1486 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1487 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1488 if (!pcbddc->current_level) { 1489 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1490 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1491 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1492 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1493 } else { 1494 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1495 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1496 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1497 len -= 15; /* remove "pc_bddc_coarse_" */ 1498 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1499 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1500 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1501 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1502 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1503 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1504 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1505 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1506 } 1507 1508 /* DIRICHLET PROBLEM */ 1509 if (dirichlet) { 1510 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1511 if (pcbddc->local_mat->symmetric_set) { 1512 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1513 } 1514 /* Matrix for Dirichlet problem is pcis->A_II */ 1515 n_D = pcis->n - pcis->n_B; 1516 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1517 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1518 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1519 /* default */ 1520 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1521 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1523 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1524 if (issbaij) { 1525 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1526 } else { 1527 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1528 } 1529 /* Allow user's customization */ 1530 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1531 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1532 } 1533 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1534 if (sub_schurs->reuse_mumps) { 1535 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1536 1537 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1538 } 1539 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1540 if (!n_D) { 1541 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1542 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1543 } 1544 /* Set Up KSP for Dirichlet problem of BDDC */ 1545 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1546 /* set ksp_D into pcis data */ 1547 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1548 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1549 pcis->ksp_D = pcbddc->ksp_D; 1550 } 1551 1552 /* NEUMANN PROBLEM */ 1553 A_RR = 0; 1554 if (neumann) { 1555 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1556 PetscInt ibs,mbs; 1557 PetscBool issbaij; 1558 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1559 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1560 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1561 if (pcbddc->ksp_R) { /* already created ksp */ 1562 PetscInt nn_R; 1563 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1564 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1565 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1566 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1567 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1568 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1569 reuse = MAT_INITIAL_MATRIX; 1570 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1571 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1572 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1573 reuse = MAT_INITIAL_MATRIX; 1574 } else { /* safe to reuse the matrix */ 1575 reuse = MAT_REUSE_MATRIX; 1576 } 1577 } 1578 /* last check */ 1579 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1580 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1581 reuse = MAT_INITIAL_MATRIX; 1582 } 1583 } else { /* first time, so we need to create the matrix */ 1584 reuse = MAT_INITIAL_MATRIX; 1585 } 1586 /* extract A_RR */ 1587 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1588 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1590 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1591 if (matis->A == pcbddc->local_mat) { 1592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1593 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1594 } else { 1595 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1596 } 1597 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1598 if (matis->A == pcbddc->local_mat) { 1599 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1600 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1601 } else { 1602 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1603 } 1604 } 1605 if (!sub_schurs->reuse_mumps) { 1606 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1607 if (pcbddc->local_mat->symmetric_set) { 1608 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1609 } 1610 } else { 1611 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1612 1613 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1614 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1615 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1616 } 1617 if (!pcbddc->ksp_R) { /* create object if not present */ 1618 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1619 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1620 /* default */ 1621 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1622 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1623 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1624 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1625 if (issbaij) { 1626 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1627 } else { 1628 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1629 } 1630 /* Allow user's customization */ 1631 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1632 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1633 } 1634 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1635 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1636 if (!n_R) { 1637 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1638 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1639 } 1640 /* Reuse MUMPS solver if it is present */ 1641 if (sub_schurs->reuse_mumps) { 1642 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1643 1644 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1645 } 1646 /* Set Up KSP for Neumann problem of BDDC */ 1647 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1648 } 1649 /* free Neumann problem's matrix */ 1650 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1651 1652 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1653 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1654 if (pcbddc->dbg_flag) { 1655 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1656 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1658 } 1659 if (dirichlet) { /* Dirichlet */ 1660 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1661 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1662 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1663 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1664 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1665 /* need to be adapted? */ 1666 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1667 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1668 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1669 /* print info */ 1670 if (pcbddc->dbg_flag) { 1671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1673 } 1674 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1675 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1676 } 1677 } 1678 if (neumann) { /* Neumann */ 1679 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1680 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1681 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1682 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1683 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1684 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1685 /* need to be adapted? */ 1686 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1687 ierr = MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1688 /* print info */ 1689 if (pcbddc->dbg_flag) { 1690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1692 } 1693 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1694 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1695 } 1696 } 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1703 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1704 { 1705 PetscErrorCode ierr; 1706 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1707 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1708 1709 PetscFunctionBegin; 1710 if (!sub_schurs->reuse_mumps) { 1711 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1712 } 1713 if (!pcbddc->switch_static) { 1714 if (applytranspose && pcbddc->local_auxmat1) { 1715 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1716 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1717 } 1718 if (!sub_schurs->reuse_mumps) { 1719 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 } else { 1722 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1723 1724 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 } 1727 } else { 1728 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1732 if (applytranspose && pcbddc->local_auxmat1) { 1733 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1734 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1735 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 } 1738 } 1739 if (!sub_schurs->reuse_mumps) { 1740 if (applytranspose) { 1741 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1742 } else { 1743 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1744 } 1745 } else { 1746 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1747 1748 if (applytranspose) { 1749 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1750 } else { 1751 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1752 } 1753 } 1754 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1755 if (!pcbddc->switch_static) { 1756 if (!sub_schurs->reuse_mumps) { 1757 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1759 } else { 1760 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1761 1762 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1764 } 1765 if (!applytranspose && pcbddc->local_auxmat1) { 1766 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1767 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1768 } 1769 } else { 1770 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 if (!applytranspose && pcbddc->local_auxmat1) { 1775 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1776 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1777 } 1778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1782 } 1783 PetscFunctionReturn(0); 1784 } 1785 1786 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1789 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1790 { 1791 PetscErrorCode ierr; 1792 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1793 PC_IS* pcis = (PC_IS*) (pc->data); 1794 const PetscScalar zero = 0.0; 1795 1796 PetscFunctionBegin; 1797 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1798 if (applytranspose) { 1799 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1800 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1801 } else { 1802 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1803 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1804 } 1805 /* start communications from local primal nodes to rhs of coarse solver */ 1806 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1807 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1808 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1809 1810 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1811 /* TODO remove null space when doing multilevel */ 1812 if (pcbddc->coarse_ksp) { 1813 Vec rhs,sol; 1814 1815 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1816 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1817 if (applytranspose) { 1818 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1819 } else { 1820 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1821 } 1822 } 1823 1824 /* Local solution on R nodes */ 1825 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1826 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1827 } 1828 1829 /* communications from coarse sol to local primal nodes */ 1830 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1831 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1832 1833 /* Sum contributions from two levels */ 1834 if (applytranspose) { 1835 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1836 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1837 } else { 1838 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1839 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1840 } 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1846 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1847 { 1848 PetscErrorCode ierr; 1849 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1850 PetscScalar *array; 1851 Vec from,to; 1852 1853 PetscFunctionBegin; 1854 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1855 from = pcbddc->coarse_vec; 1856 to = pcbddc->vec1_P; 1857 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1858 Vec tvec; 1859 1860 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1861 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1862 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1863 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1864 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1865 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1866 } 1867 } else { /* from local to global -> put data in coarse right hand side */ 1868 from = pcbddc->vec1_P; 1869 to = pcbddc->coarse_vec; 1870 } 1871 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1877 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1878 { 1879 PetscErrorCode ierr; 1880 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1881 PetscScalar *array; 1882 Vec from,to; 1883 1884 PetscFunctionBegin; 1885 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1886 from = pcbddc->coarse_vec; 1887 to = pcbddc->vec1_P; 1888 } else { /* from local to global -> put data in coarse right hand side */ 1889 from = pcbddc->vec1_P; 1890 to = pcbddc->coarse_vec; 1891 } 1892 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1893 if (smode == SCATTER_FORWARD) { 1894 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1895 Vec tvec; 1896 1897 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1898 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1899 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1900 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1901 } 1902 } else { 1903 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1904 ierr = VecResetArray(from);CHKERRQ(ierr); 1905 } 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /* uncomment for testing purposes */ 1911 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1914 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1915 { 1916 PetscErrorCode ierr; 1917 PC_IS* pcis = (PC_IS*)(pc->data); 1918 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1919 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1920 /* one and zero */ 1921 PetscScalar one=1.0,zero=0.0; 1922 /* space to store constraints and their local indices */ 1923 PetscScalar *constraints_data; 1924 PetscInt *constraints_idxs,*constraints_idxs_B; 1925 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1926 PetscInt *constraints_n; 1927 /* iterators */ 1928 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1929 /* BLAS integers */ 1930 PetscBLASInt lwork,lierr; 1931 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1932 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1933 /* reuse */ 1934 PetscInt olocal_primal_size,olocal_primal_size_cc; 1935 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1936 /* change of basis */ 1937 PetscBool qr_needed; 1938 PetscBT change_basis,qr_needed_idx; 1939 /* auxiliary stuff */ 1940 PetscInt *nnz,*is_indices; 1941 PetscInt ncc; 1942 /* some quantities */ 1943 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1944 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1945 1946 PetscFunctionBegin; 1947 /* Destroy Mat objects computed previously */ 1948 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1949 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1950 /* save info on constraints from previous setup (if any) */ 1951 olocal_primal_size = pcbddc->local_primal_size; 1952 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1953 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1954 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1955 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1956 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1957 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1958 1959 /* print some info */ 1960 if (pcbddc->dbg_flag) { 1961 IS vertices; 1962 PetscInt nv,nedges,nfaces; 1963 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1964 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1965 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1966 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1972 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1973 } 1974 1975 if (!pcbddc->adaptive_selection) { 1976 IS ISForVertices,*ISForFaces,*ISForEdges; 1977 MatNullSpace nearnullsp; 1978 const Vec *nearnullvecs; 1979 Vec *localnearnullsp; 1980 PetscScalar *array; 1981 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1982 PetscBool nnsp_has_cnst; 1983 /* LAPACK working arrays for SVD or POD */ 1984 PetscBool skip_lapack,boolforchange; 1985 PetscScalar *work; 1986 PetscReal *singular_vals; 1987 #if defined(PETSC_USE_COMPLEX) 1988 PetscReal *rwork; 1989 #endif 1990 #if defined(PETSC_MISSING_LAPACK_GESVD) 1991 PetscScalar *temp_basis,*correlation_mat; 1992 #else 1993 PetscBLASInt dummy_int=1; 1994 PetscScalar dummy_scalar=1.; 1995 #endif 1996 1997 /* Get index sets for faces, edges and vertices from graph */ 1998 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1999 /* free unneeded index sets */ 2000 if (!pcbddc->use_vertices) { 2001 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2002 } 2003 if (!pcbddc->use_edges) { 2004 for (i=0;i<n_ISForEdges;i++) { 2005 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2006 } 2007 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2008 n_ISForEdges = 0; 2009 } 2010 if (!pcbddc->use_faces) { 2011 for (i=0;i<n_ISForFaces;i++) { 2012 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2013 } 2014 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2015 n_ISForFaces = 0; 2016 } 2017 2018 #if defined(PETSC_USE_DEBUG) 2019 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2020 Also use_change_of_basis should be consistent among processors */ 2021 if (pcbddc->NullSpace) { 2022 PetscBool tbool[2],gbool[2]; 2023 2024 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2025 pcbddc->use_change_of_basis = PETSC_TRUE; 2026 if (!ISForEdges) { 2027 pcbddc->use_change_on_faces = PETSC_TRUE; 2028 } 2029 } 2030 tbool[0] = pcbddc->use_change_of_basis; 2031 tbool[1] = pcbddc->use_change_on_faces; 2032 ierr = MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2033 pcbddc->use_change_of_basis = gbool[0]; 2034 pcbddc->use_change_on_faces = gbool[1]; 2035 } 2036 #endif 2037 2038 /* check if near null space is attached to global mat */ 2039 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2040 if (nearnullsp) { 2041 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2042 /* remove any stored info */ 2043 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2044 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2045 /* store information for BDDC solver reuse */ 2046 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2047 pcbddc->onearnullspace = nearnullsp; 2048 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2049 for (i=0;i<nnsp_size;i++) { 2050 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2051 } 2052 } else { /* if near null space is not provided BDDC uses constants by default */ 2053 nnsp_size = 0; 2054 nnsp_has_cnst = PETSC_TRUE; 2055 } 2056 /* get max number of constraints on a single cc */ 2057 max_constraints = nnsp_size; 2058 if (nnsp_has_cnst) max_constraints++; 2059 2060 /* 2061 Evaluate maximum storage size needed by the procedure 2062 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2063 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2064 There can be multiple constraints per connected component 2065 */ 2066 n_vertices = 0; 2067 if (ISForVertices) { 2068 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2069 } 2070 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2071 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2072 2073 total_counts = n_ISForFaces+n_ISForEdges; 2074 total_counts *= max_constraints; 2075 total_counts += n_vertices; 2076 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2077 2078 total_counts = 0; 2079 max_size_of_constraint = 0; 2080 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2081 IS used_is; 2082 if (i<n_ISForEdges) { 2083 used_is = ISForEdges[i]; 2084 } else { 2085 used_is = ISForFaces[i-n_ISForEdges]; 2086 } 2087 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2088 total_counts += j; 2089 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2090 } 2091 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2092 2093 /* get local part of global near null space vectors */ 2094 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2095 for (k=0;k<nnsp_size;k++) { 2096 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2097 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 } 2100 2101 /* whether or not to skip lapack calls */ 2102 skip_lapack = PETSC_TRUE; 2103 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2104 2105 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2106 if (!skip_lapack) { 2107 PetscScalar temp_work; 2108 2109 #if defined(PETSC_MISSING_LAPACK_GESVD) 2110 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2111 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2112 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2113 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2114 #if defined(PETSC_USE_COMPLEX) 2115 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2116 #endif 2117 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2118 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2120 lwork = -1; 2121 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2122 #if !defined(PETSC_USE_COMPLEX) 2123 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2124 #else 2125 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2126 #endif 2127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2128 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2129 #else /* on missing GESVD */ 2130 /* SVD */ 2131 PetscInt max_n,min_n; 2132 max_n = max_size_of_constraint; 2133 min_n = max_constraints; 2134 if (max_size_of_constraint < max_constraints) { 2135 min_n = max_size_of_constraint; 2136 max_n = max_constraints; 2137 } 2138 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2139 #if defined(PETSC_USE_COMPLEX) 2140 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2141 #endif 2142 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2143 lwork = -1; 2144 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2147 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2148 #if !defined(PETSC_USE_COMPLEX) 2149 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2150 #else 2151 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2152 #endif 2153 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2154 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2155 #endif /* on missing GESVD */ 2156 /* Allocate optimal workspace */ 2157 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2159 } 2160 /* Now we can loop on constraining sets */ 2161 total_counts = 0; 2162 constraints_idxs_ptr[0] = 0; 2163 constraints_data_ptr[0] = 0; 2164 /* vertices */ 2165 if (n_vertices) { 2166 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2167 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2168 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2169 for (i=0;i<n_vertices;i++) { 2170 constraints_n[total_counts] = 1; 2171 constraints_data[total_counts] = 1.0; 2172 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2173 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2174 total_counts++; 2175 } 2176 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2177 PetscBool used_vertex; 2178 for (i=0;i<n_vertices;i++) { 2179 used_vertex = PETSC_FALSE; 2180 k = 0; 2181 while (!used_vertex && k<nnsp_size) { 2182 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2183 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2184 constraints_n[total_counts] = 1; 2185 constraints_idxs[total_counts] = is_indices[i]; 2186 constraints_data[total_counts] = 1.0; 2187 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2188 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2189 total_counts++; 2190 used_vertex = PETSC_TRUE; 2191 } 2192 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2193 k++; 2194 } 2195 } 2196 } 2197 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2198 n_vertices = total_counts; 2199 } 2200 2201 /* edges and faces */ 2202 total_counts_cc = total_counts; 2203 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2204 IS used_is; 2205 PetscBool idxs_copied = PETSC_FALSE; 2206 2207 if (ncc<n_ISForEdges) { 2208 used_is = ISForEdges[ncc]; 2209 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2210 } else { 2211 used_is = ISForFaces[ncc-n_ISForEdges]; 2212 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2213 } 2214 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2215 2216 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2217 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2218 /* change of basis should not be performed on local periodic nodes */ 2219 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2220 if (nnsp_has_cnst) { 2221 PetscScalar quad_value; 2222 2223 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2224 idxs_copied = PETSC_TRUE; 2225 2226 if (!pcbddc->use_nnsp_true) { 2227 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2228 } else { 2229 quad_value = 1.0; 2230 } 2231 for (j=0;j<size_of_constraint;j++) { 2232 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2233 } 2234 temp_constraints++; 2235 total_counts++; 2236 } 2237 for (k=0;k<nnsp_size;k++) { 2238 PetscReal real_value; 2239 PetscScalar *ptr_to_data; 2240 2241 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2242 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2243 for (j=0;j<size_of_constraint;j++) { 2244 ptr_to_data[j] = array[is_indices[j]]; 2245 } 2246 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2247 /* check if array is null on the connected component */ 2248 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2249 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2250 if (real_value > 0.0) { /* keep indices and values */ 2251 temp_constraints++; 2252 total_counts++; 2253 if (!idxs_copied) { 2254 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2255 idxs_copied = PETSC_TRUE; 2256 } 2257 } 2258 } 2259 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2260 valid_constraints = temp_constraints; 2261 if (!pcbddc->use_nnsp_true && temp_constraints) { 2262 if (temp_constraints == 1) { /* just normalize the constraint */ 2263 PetscScalar norm,*ptr_to_data; 2264 2265 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2267 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2268 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2269 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2270 } else { /* perform SVD */ 2271 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2272 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2273 2274 #if defined(PETSC_MISSING_LAPACK_GESVD) 2275 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2276 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2277 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2278 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2279 from that computed using LAPACKgesvd 2280 -> This is due to a different computation of eigenvectors in LAPACKheev 2281 -> The quality of the POD-computed basis will be the same */ 2282 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2283 /* Store upper triangular part of correlation matrix */ 2284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2286 for (j=0;j<temp_constraints;j++) { 2287 for (k=0;k<j+1;k++) { 2288 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2289 } 2290 } 2291 /* compute eigenvalues and eigenvectors of correlation matrix */ 2292 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2293 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2294 #if !defined(PETSC_USE_COMPLEX) 2295 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2296 #else 2297 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2298 #endif 2299 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2300 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2301 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2302 j = 0; 2303 while (j < temp_constraints && singular_vals[j] < tol) j++; 2304 total_counts = total_counts-j; 2305 valid_constraints = temp_constraints-j; 2306 /* scale and copy POD basis into used quadrature memory */ 2307 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2308 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2313 if (j<temp_constraints) { 2314 PetscInt ii; 2315 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2316 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2317 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2318 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2319 for (k=0;k<temp_constraints-j;k++) { 2320 for (ii=0;ii<size_of_constraint;ii++) { 2321 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2322 } 2323 } 2324 } 2325 #else /* on missing GESVD */ 2326 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2329 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2330 #if !defined(PETSC_USE_COMPLEX) 2331 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2332 #else 2333 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2334 #endif 2335 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2337 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2338 k = temp_constraints; 2339 if (k > size_of_constraint) k = size_of_constraint; 2340 j = 0; 2341 while (j < k && singular_vals[k-j-1] < tol) j++; 2342 valid_constraints = k-j; 2343 total_counts = total_counts-temp_constraints+valid_constraints; 2344 #endif /* on missing GESVD */ 2345 } 2346 } 2347 /* update pointers information */ 2348 if (valid_constraints) { 2349 constraints_n[total_counts_cc] = valid_constraints; 2350 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2351 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2352 /* set change_of_basis flag */ 2353 if (boolforchange) { 2354 PetscBTSet(change_basis,total_counts_cc); 2355 } 2356 total_counts_cc++; 2357 } 2358 } 2359 /* free workspace */ 2360 if (!skip_lapack) { 2361 ierr = PetscFree(work);CHKERRQ(ierr); 2362 #if defined(PETSC_USE_COMPLEX) 2363 ierr = PetscFree(rwork);CHKERRQ(ierr); 2364 #endif 2365 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2366 #if defined(PETSC_MISSING_LAPACK_GESVD) 2367 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2368 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2369 #endif 2370 } 2371 for (k=0;k<nnsp_size;k++) { 2372 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2373 } 2374 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2375 /* free index sets of faces, edges and vertices */ 2376 for (i=0;i<n_ISForFaces;i++) { 2377 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2378 } 2379 if (n_ISForFaces) { 2380 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2381 } 2382 for (i=0;i<n_ISForEdges;i++) { 2383 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2384 } 2385 if (n_ISForEdges) { 2386 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2387 } 2388 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2389 } else { 2390 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2391 2392 total_counts = 0; 2393 n_vertices = 0; 2394 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2395 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2396 } 2397 max_constraints = 0; 2398 total_counts_cc = 0; 2399 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2400 total_counts += pcbddc->adaptive_constraints_n[i]; 2401 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2402 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2403 } 2404 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2405 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2406 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2407 constraints_data = pcbddc->adaptive_constraints_data; 2408 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2409 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2410 total_counts_cc = 0; 2411 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2412 if (pcbddc->adaptive_constraints_n[i]) { 2413 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2414 } 2415 } 2416 #if 0 2417 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2418 for (i=0;i<total_counts_cc;i++) { 2419 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2420 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2421 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2422 printf(" %d",constraints_idxs[j]); 2423 } 2424 printf("\n"); 2425 printf("number of cc: %d\n",constraints_n[i]); 2426 } 2427 for (i=0;i<n_vertices;i++) { 2428 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2429 } 2430 for (i=0;i<sub_schurs->n_subs;i++) { 2431 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2432 } 2433 #endif 2434 2435 max_size_of_constraint = 0; 2436 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2437 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2438 /* Change of basis */ 2439 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2440 if (pcbddc->use_change_of_basis) { 2441 for (i=0;i<sub_schurs->n_subs;i++) { 2442 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2443 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2444 } 2445 } 2446 } 2447 } 2448 pcbddc->local_primal_size = total_counts; 2449 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2450 2451 /* map constraints_idxs in boundary numbering */ 2452 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2453 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2454 2455 /* Create constraint matrix */ 2456 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2457 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2458 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2459 2460 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2461 /* determine if a QR strategy is needed for change of basis */ 2462 qr_needed = PETSC_FALSE; 2463 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2464 total_primal_vertices=0; 2465 pcbddc->local_primal_size_cc = 0; 2466 for (i=0;i<total_counts_cc;i++) { 2467 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2468 if (size_of_constraint == 1) { 2469 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2470 pcbddc->local_primal_size_cc += 1; 2471 } else if (PetscBTLookup(change_basis,i)) { 2472 for (k=0;k<constraints_n[i];k++) { 2473 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2474 } 2475 pcbddc->local_primal_size_cc += constraints_n[i]; 2476 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2477 PetscBTSet(qr_needed_idx,i); 2478 qr_needed = PETSC_TRUE; 2479 } 2480 } else { 2481 pcbddc->local_primal_size_cc += 1; 2482 } 2483 } 2484 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2485 pcbddc->n_vertices = total_primal_vertices; 2486 /* permute indices in order to have a sorted set of vertices */ 2487 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2488 2489 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2490 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2491 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2492 2493 /* nonzero structure of constraint matrix */ 2494 /* and get reference dof for local constraints */ 2495 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2496 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2497 2498 j = total_primal_vertices; 2499 total_counts = total_primal_vertices; 2500 cum = total_primal_vertices; 2501 for (i=n_vertices;i<total_counts_cc;i++) { 2502 if (!PetscBTLookup(change_basis,i)) { 2503 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2504 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2505 cum++; 2506 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2507 for (k=0;k<constraints_n[i];k++) { 2508 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2509 nnz[j+k] = size_of_constraint; 2510 } 2511 j += constraints_n[i]; 2512 } 2513 } 2514 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2515 ierr = PetscFree(nnz);CHKERRQ(ierr); 2516 2517 /* set values in constraint matrix */ 2518 for (i=0;i<total_primal_vertices;i++) { 2519 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2520 } 2521 total_counts = total_primal_vertices; 2522 for (i=n_vertices;i<total_counts_cc;i++) { 2523 if (!PetscBTLookup(change_basis,i)) { 2524 PetscInt *cols; 2525 2526 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2527 cols = constraints_idxs+constraints_idxs_ptr[i]; 2528 for (k=0;k<constraints_n[i];k++) { 2529 PetscInt row = total_counts+k; 2530 PetscScalar *vals; 2531 2532 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2533 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2534 } 2535 total_counts += constraints_n[i]; 2536 } 2537 } 2538 /* assembling */ 2539 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2540 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2541 2542 /* 2543 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2544 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2545 */ 2546 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2547 if (pcbddc->use_change_of_basis) { 2548 /* dual and primal dofs on a single cc */ 2549 PetscInt dual_dofs,primal_dofs; 2550 /* working stuff for GEQRF */ 2551 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2552 PetscBLASInt lqr_work; 2553 /* working stuff for UNGQR */ 2554 PetscScalar *gqr_work,lgqr_work_t; 2555 PetscBLASInt lgqr_work; 2556 /* working stuff for TRTRS */ 2557 PetscScalar *trs_rhs; 2558 PetscBLASInt Blas_NRHS; 2559 /* pointers for values insertion into change of basis matrix */ 2560 PetscInt *start_rows,*start_cols; 2561 PetscScalar *start_vals; 2562 /* working stuff for values insertion */ 2563 PetscBT is_primal; 2564 PetscInt *aux_primal_numbering_B; 2565 /* matrix sizes */ 2566 PetscInt global_size,local_size; 2567 /* temporary change of basis */ 2568 Mat localChangeOfBasisMatrix; 2569 /* extra space for debugging */ 2570 PetscScalar *dbg_work; 2571 2572 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2573 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2574 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2575 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2576 /* nonzeros for local mat */ 2577 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2578 for (i=0;i<pcis->n;i++) nnz[i]=1; 2579 for (i=n_vertices;i<total_counts_cc;i++) { 2580 if (PetscBTLookup(change_basis,i)) { 2581 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2582 if (PetscBTLookup(qr_needed_idx,i)) { 2583 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2584 } else { 2585 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2586 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2587 } 2588 } 2589 } 2590 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2591 ierr = PetscFree(nnz);CHKERRQ(ierr); 2592 /* Set initial identity in the matrix */ 2593 for (i=0;i<pcis->n;i++) { 2594 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2595 } 2596 2597 if (pcbddc->dbg_flag) { 2598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2600 } 2601 2602 2603 /* Now we loop on the constraints which need a change of basis */ 2604 /* 2605 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2606 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2607 2608 Basic blocks of change of basis matrix T computed by 2609 2610 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2611 2612 | 1 0 ... 0 s_1/S | 2613 | 0 1 ... 0 s_2/S | 2614 | ... | 2615 | 0 ... 1 s_{n-1}/S | 2616 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2617 2618 with S = \sum_{i=1}^n s_i^2 2619 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2620 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2621 2622 - QR decomposition of constraints otherwise 2623 */ 2624 if (qr_needed) { 2625 /* space to store Q */ 2626 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2627 /* first we issue queries for optimal work */ 2628 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2629 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2630 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2631 lqr_work = -1; 2632 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2633 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2634 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2635 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2636 lgqr_work = -1; 2637 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2638 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2639 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2640 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2641 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2642 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2643 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2644 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2645 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2646 /* array to store scaling factors for reflectors */ 2647 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2648 /* array to store rhs and solution of triangular solver */ 2649 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2650 /* allocating workspace for check */ 2651 if (pcbddc->dbg_flag) { 2652 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2653 } 2654 } 2655 /* array to store whether a node is primal or not */ 2656 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2657 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2658 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2659 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2660 for (i=0;i<total_primal_vertices;i++) { 2661 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2662 } 2663 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2664 2665 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2666 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2667 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2668 if (PetscBTLookup(change_basis,total_counts)) { 2669 /* get constraint info */ 2670 primal_dofs = constraints_n[total_counts]; 2671 dual_dofs = size_of_constraint-primal_dofs; 2672 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2675 } 2676 2677 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2678 2679 /* copy quadrature constraints for change of basis check */ 2680 if (pcbddc->dbg_flag) { 2681 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2682 } 2683 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2684 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2685 2686 /* compute QR decomposition of constraints */ 2687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2688 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2690 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2691 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2692 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2693 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2694 2695 /* explictly compute R^-T */ 2696 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2697 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2698 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2702 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2703 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2704 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2705 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2706 2707 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2713 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2715 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2716 2717 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2718 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2719 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2720 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2726 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2727 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2728 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2729 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2730 2731 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2732 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2733 /* insert cols for primal dofs */ 2734 for (j=0;j<primal_dofs;j++) { 2735 start_vals = &qr_basis[j*size_of_constraint]; 2736 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2737 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2738 } 2739 /* insert cols for dual dofs */ 2740 for (j=0,k=0;j<dual_dofs;k++) { 2741 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2742 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2743 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2744 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2745 j++; 2746 } 2747 } 2748 2749 /* check change of basis */ 2750 if (pcbddc->dbg_flag) { 2751 PetscInt ii,jj; 2752 PetscBool valid_qr=PETSC_TRUE; 2753 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2757 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2758 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2759 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2760 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2761 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2762 for (jj=0;jj<size_of_constraint;jj++) { 2763 for (ii=0;ii<primal_dofs;ii++) { 2764 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2765 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2766 } 2767 } 2768 if (!valid_qr) { 2769 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2770 for (jj=0;jj<size_of_constraint;jj++) { 2771 for (ii=0;ii<primal_dofs;ii++) { 2772 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2773 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2774 } 2775 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2776 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2777 } 2778 } 2779 } 2780 } else { 2781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2782 } 2783 } 2784 } else { /* simple transformation block */ 2785 PetscInt row,col; 2786 PetscScalar val,norm; 2787 2788 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2789 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2790 for (j=0;j<size_of_constraint;j++) { 2791 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2792 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2793 if (!PetscBTLookup(is_primal,row_B)) { 2794 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2795 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2796 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2797 } else { 2798 for (k=0;k<size_of_constraint;k++) { 2799 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2800 if (row != col) { 2801 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2802 } else { 2803 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2804 } 2805 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2806 } 2807 } 2808 } 2809 if (pcbddc->dbg_flag) { 2810 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2811 } 2812 } 2813 } else { 2814 if (pcbddc->dbg_flag) { 2815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2816 } 2817 } 2818 } 2819 2820 /* free workspace */ 2821 if (qr_needed) { 2822 if (pcbddc->dbg_flag) { 2823 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2824 } 2825 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2826 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2827 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2828 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2829 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2830 } 2831 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2832 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 2835 /* assembling of global change of variable */ 2836 { 2837 Mat tmat; 2838 PetscInt bs; 2839 2840 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2841 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2842 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2843 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2844 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2845 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2846 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2847 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2848 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2849 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2850 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2851 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2852 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2853 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2854 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2855 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2856 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2857 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2858 } 2859 /* check */ 2860 if (pcbddc->dbg_flag) { 2861 PetscReal error; 2862 Vec x,x_change; 2863 2864 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2865 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2866 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2867 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2868 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2869 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2870 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2871 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2872 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2873 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2874 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2875 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2877 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2878 ierr = VecDestroy(&x);CHKERRQ(ierr); 2879 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2880 } 2881 2882 /* adapt sub_schurs computed (if any) */ 2883 if (pcbddc->use_deluxe_scaling) { 2884 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2885 if (sub_schurs->S_Ej_all) { 2886 Mat S_new,tmat; 2887 IS is_all_N; 2888 2889 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2890 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2891 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2892 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2893 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2894 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2895 sub_schurs->S_Ej_all = S_new; 2896 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2897 if (sub_schurs->sum_S_Ej_all) { 2898 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2899 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2900 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2901 sub_schurs->sum_S_Ej_all = S_new; 2902 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2903 } 2904 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2905 } 2906 } 2907 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2908 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2909 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2910 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2911 } 2912 2913 /* set up change of basis context */ 2914 if (pcbddc->ChangeOfBasisMatrix) { 2915 PCBDDCChange_ctx change_ctx; 2916 2917 if (!pcbddc->new_global_mat) { 2918 PetscInt global_size,local_size; 2919 2920 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2921 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2922 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2923 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2924 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2925 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2926 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2927 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2928 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2929 } else { 2930 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2931 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2932 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2933 } 2934 if (!pcbddc->user_ChangeOfBasisMatrix) { 2935 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2936 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2937 } else { 2938 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2939 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2940 } 2941 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2942 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2943 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 } 2946 2947 /* check if a new primal space has been introduced */ 2948 pcbddc->new_primal_space_local = PETSC_TRUE; 2949 if (olocal_primal_size == pcbddc->local_primal_size) { 2950 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2951 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2952 if (!pcbddc->new_primal_space_local) { 2953 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2954 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2955 } 2956 } 2957 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2958 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2959 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2960 2961 /* flush dbg viewer */ 2962 if (pcbddc->dbg_flag) { 2963 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2964 } 2965 2966 /* free workspace */ 2967 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2968 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2969 if (!pcbddc->adaptive_selection) { 2970 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2971 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2972 } else { 2973 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2974 pcbddc->adaptive_constraints_idxs_ptr, 2975 pcbddc->adaptive_constraints_data_ptr, 2976 pcbddc->adaptive_constraints_idxs, 2977 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2978 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2979 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2980 } 2981 PetscFunctionReturn(0); 2982 } 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2986 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2987 { 2988 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2989 PC_IS *pcis = (PC_IS*)pc->data; 2990 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2991 PetscInt ierr,i,vertex_size,N; 2992 PetscViewer viewer=pcbddc->dbg_viewer; 2993 2994 PetscFunctionBegin; 2995 /* Reset previously computed graph */ 2996 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2997 /* Init local Graph struct */ 2998 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2999 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3000 3001 /* Check validity of the csr graph passed in by the user */ 3002 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3003 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3004 } 3005 3006 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3007 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3008 PetscInt *xadj,*adjncy; 3009 PetscInt nvtxs; 3010 PetscBool flg_row=PETSC_FALSE; 3011 3012 if (pcbddc->use_local_adj) { 3013 3014 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3015 if (flg_row) { 3016 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3017 pcbddc->computed_rowadj = PETSC_TRUE; 3018 } 3019 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3020 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3021 IS is_dummy; 3022 ISLocalToGlobalMapping l2gmap_dummy; 3023 PetscInt j,sum; 3024 PetscInt *cxadj,*cadjncy; 3025 const PetscInt *idxs; 3026 PCBDDCGraph graph; 3027 PetscBT is_on_boundary; 3028 3029 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3030 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3031 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3032 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3033 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3034 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3035 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3036 if (flg_row) { 3037 graph->xadj = xadj; 3038 graph->adjncy = adjncy; 3039 } 3040 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3041 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3042 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3043 3044 if (pcbddc->dbg_flag) { 3045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3046 for (i=0;i<graph->ncc;i++) { 3047 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3048 } 3049 } 3050 3051 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3052 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3053 for (i=0;i<pcis->n_B;i++) { 3054 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3055 } 3056 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3057 3058 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3059 sum = 0; 3060 for (i=0;i<graph->ncc;i++) { 3061 PetscInt sizecc = 0; 3062 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3063 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3064 sizecc++; 3065 } 3066 } 3067 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3068 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3069 cxadj[graph->queue[j]] = sizecc; 3070 } 3071 } 3072 sum += sizecc*sizecc; 3073 } 3074 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3075 sum = 0; 3076 for (i=0;i<pcis->n;i++) { 3077 PetscInt temp = cxadj[i]; 3078 cxadj[i] = sum; 3079 sum += temp; 3080 } 3081 cxadj[pcis->n] = sum; 3082 for (i=0;i<graph->ncc;i++) { 3083 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3084 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3085 PetscInt k,sizecc = 0; 3086 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3087 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3088 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3089 sizecc++; 3090 } 3091 } 3092 } 3093 } 3094 } 3095 if (sum) { 3096 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3097 } else { 3098 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3099 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3100 } 3101 graph->xadj = 0; 3102 graph->adjncy = 0; 3103 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3104 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3105 } 3106 } 3107 if (pcbddc->dbg_flag) { 3108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3109 } 3110 3111 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3112 vertex_size = 1; 3113 if (pcbddc->user_provided_isfordofs) { 3114 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3115 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3116 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3117 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3118 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3119 } 3120 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3121 pcbddc->n_ISForDofs = 0; 3122 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3123 } 3124 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3125 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3126 } else { 3127 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3128 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3129 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3130 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3131 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3132 } 3133 } 3134 } 3135 3136 /* Setup of Graph */ 3137 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3138 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3139 } 3140 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3141 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3142 } 3143 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3144 3145 /* Graph's connected components analysis */ 3146 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3147 3148 /* print some info to stdout */ 3149 if (pcbddc->dbg_flag) { 3150 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3151 } 3152 3153 /* mark topography has done */ 3154 pcbddc->recompute_topography = PETSC_FALSE; 3155 PetscFunctionReturn(0); 3156 } 3157 3158 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "PCBDDCSubsetNumbering" 3161 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3162 { 3163 PetscSF sf; 3164 PetscLayout map; 3165 const PetscInt *idxs; 3166 PetscInt *leaf_data,*root_data,*gidxs; 3167 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3168 PetscInt n_n,nlocals,start,first_index; 3169 PetscMPIInt commsize; 3170 PetscBool first_found; 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3175 if (subset_mult) { 3176 PetscCheckSameComm(subset,1,subset_mult,2); 3177 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3178 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3179 } 3180 /* create workspace layout for computing global indices of subset */ 3181 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3182 lbounds[0] = lbounds[1] = 0; 3183 for (i=0;i<n;i++) { 3184 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3185 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3186 } 3187 lbounds[0] = -lbounds[0]; 3188 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3189 gbounds[0] = -gbounds[0]; 3190 N = gbounds[1] - gbounds[0] + 1; 3191 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3192 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3193 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3194 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3195 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3196 3197 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3198 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3199 if (subset_mult) { 3200 const PetscInt* idxs_mult; 3201 3202 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3203 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3204 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3205 } else { 3206 for (i=0;i<n;i++) leaf_data[i] = 1; 3207 } 3208 /* local size of new subset */ 3209 n_n = 0; 3210 for (i=0;i<n;i++) n_n += leaf_data[i]; 3211 3212 /* global indexes in layout */ 3213 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3214 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3215 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3216 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3217 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3218 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3219 3220 /* reduce from leaves to roots */ 3221 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3222 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3223 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3224 3225 /* count indexes in local part of layout */ 3226 nlocals = 0; 3227 first_index = -1; 3228 first_found = PETSC_FALSE; 3229 for (i=0;i<Nl;i++) { 3230 if (!first_found && root_data[i]) { 3231 first_found = PETSC_TRUE; 3232 first_index = i; 3233 } 3234 nlocals += root_data[i]; 3235 } 3236 3237 /* cumulative of number of indexes and size of subset without holes */ 3238 #if defined(PETSC_HAVE_MPI_EXSCAN) 3239 start = 0; 3240 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3241 #else 3242 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3243 start = start-nlocals; 3244 #endif 3245 3246 if (N_n) { /* compute total size of new subset if requested */ 3247 *N_n = start + nlocals; 3248 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3249 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3250 } 3251 3252 /* adapt root data with cumulative */ 3253 if (first_found) { 3254 PetscInt old_index; 3255 3256 root_data[first_index] += start; 3257 old_index = first_index; 3258 for (i=first_index+1;i<Nl;i++) { 3259 if (root_data[i]) { 3260 root_data[i] += root_data[old_index]; 3261 old_index = i; 3262 } 3263 } 3264 } 3265 3266 /* from roots to leaves */ 3267 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3268 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3269 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3270 3271 /* create new IS with global indexes without holes */ 3272 if (subset_mult) { 3273 const PetscInt* idxs_mult; 3274 PetscInt cum; 3275 3276 cum = 0; 3277 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3278 for (i=0;i<n;i++) { 3279 PetscInt j; 3280 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3281 } 3282 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3283 } else { 3284 for (i=0;i<n;i++) { 3285 gidxs[i] = leaf_data[i]-1; 3286 } 3287 } 3288 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3289 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3290 PetscFunctionReturn(0); 3291 } 3292 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3295 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3296 { 3297 PetscInt i,j; 3298 PetscScalar *alphas; 3299 PetscErrorCode ierr; 3300 3301 PetscFunctionBegin; 3302 /* this implements stabilized Gram-Schmidt */ 3303 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3304 for (i=0;i<n;i++) { 3305 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3306 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3307 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3308 } 3309 ierr = PetscFree(alphas);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatISGetSubassemblingPattern" 3315 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3316 { 3317 IS ranks_send_to; 3318 PetscInt n_neighs,*neighs,*n_shared,**shared; 3319 PetscMPIInt size,rank,color; 3320 PetscInt *xadj,*adjncy; 3321 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3322 PetscInt i,local_size,threshold=0; 3323 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3324 PetscSubcomm subcomm; 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3329 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3330 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3331 3332 /* Get info on mapping */ 3333 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3334 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3335 3336 /* build local CSR graph of subdomains' connectivity */ 3337 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3338 xadj[0] = 0; 3339 xadj[1] = PetscMax(n_neighs-1,0); 3340 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3341 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3342 3343 if (threshold) { 3344 PetscInt xadj_count = 0; 3345 for (i=1;i<n_neighs;i++) { 3346 if (n_shared[i] > threshold) { 3347 adjncy[xadj_count] = neighs[i]; 3348 adjncy_wgt[xadj_count] = n_shared[i]; 3349 xadj_count++; 3350 } 3351 } 3352 xadj[1] = xadj_count; 3353 } else { 3354 if (xadj[1]) { 3355 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3356 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3357 } 3358 } 3359 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3360 if (use_square) { 3361 for (i=0;i<xadj[1];i++) { 3362 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3363 } 3364 } 3365 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3366 3367 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3368 3369 /* 3370 Restrict work on active processes only. 3371 */ 3372 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3373 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3374 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3375 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3376 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3377 if (color) { 3378 ierr = PetscFree(xadj);CHKERRQ(ierr); 3379 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3380 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3381 } else { 3382 Mat subdomain_adj; 3383 IS new_ranks,new_ranks_contig; 3384 MatPartitioning partitioner; 3385 PetscInt prank,rstart=0,rend=0; 3386 PetscInt *is_indices,*oldranks; 3387 PetscBool aggregate; 3388 3389 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3390 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3391 prank = rank; 3392 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3393 /* 3394 for (i=0;i<size;i++) { 3395 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3396 } 3397 */ 3398 for (i=0;i<xadj[1];i++) { 3399 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3400 } 3401 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3402 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3403 if (aggregate) { 3404 PetscInt lrows,row,ncols,*cols; 3405 PetscMPIInt nrank; 3406 PetscScalar *vals; 3407 3408 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3409 lrows = 0; 3410 if (nrank<redprocs) { 3411 lrows = size/redprocs; 3412 if (nrank<size%redprocs) lrows++; 3413 } 3414 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3415 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3416 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3417 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3418 row = nrank; 3419 ncols = xadj[1]-xadj[0]; 3420 cols = adjncy; 3421 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3422 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3423 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3424 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3426 ierr = PetscFree(xadj);CHKERRQ(ierr); 3427 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3428 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3429 ierr = PetscFree(vals);CHKERRQ(ierr); 3430 } else { 3431 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3432 } 3433 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3434 3435 /* Partition */ 3436 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3437 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3438 if (use_vwgt) { 3439 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3440 v_wgt[0] = local_size; 3441 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3442 } 3443 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3444 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3445 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3446 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3447 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3448 3449 /* renumber new_ranks to avoid "holes" in new set of processors */ 3450 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3451 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3452 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3453 if (!redprocs) { 3454 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3455 } else { 3456 PetscInt idxs[1]; 3457 PetscMPIInt tag; 3458 MPI_Request *reqs; 3459 3460 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3461 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3462 for (i=rstart;i<rend;i++) { 3463 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3464 } 3465 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3466 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3467 ierr = PetscFree(reqs);CHKERRQ(ierr); 3468 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3469 } 3470 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3471 /* clean up */ 3472 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3473 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3474 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3475 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3476 } 3477 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3478 3479 /* assemble parallel IS for sends */ 3480 i = 1; 3481 if (color) i=0; 3482 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3483 /* get back IS */ 3484 *is_sends = ranks_send_to; 3485 PetscFunctionReturn(0); 3486 } 3487 3488 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatISSubassemble" 3492 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3493 { 3494 Mat local_mat; 3495 IS is_sends_internal; 3496 PetscInt rows,cols,new_local_rows; 3497 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3498 PetscBool ismatis,isdense,newisdense,destroy_mat; 3499 ISLocalToGlobalMapping l2gmap; 3500 PetscInt* l2gmap_indices; 3501 const PetscInt* is_indices; 3502 MatType new_local_type; 3503 /* buffers */ 3504 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3505 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3506 PetscInt *recv_buffer_idxs_local; 3507 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3508 /* MPI */ 3509 MPI_Comm comm,comm_n; 3510 PetscSubcomm subcomm; 3511 PetscMPIInt n_sends,n_recvs,commsize; 3512 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3513 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3514 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3515 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3516 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3517 PetscErrorCode ierr; 3518 3519 PetscFunctionBegin; 3520 /* TODO: add missing checks */ 3521 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3522 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3523 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3524 PetscValidLogicalCollectiveInt(mat,nis,7); 3525 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3526 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3527 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3528 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3529 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3530 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3531 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3532 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3533 PetscInt mrows,mcols,mnrows,mncols; 3534 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3535 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3536 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3537 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3538 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3539 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3540 } 3541 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3542 PetscValidLogicalCollectiveInt(mat,bs,0); 3543 /* prepare IS for sending if not provided */ 3544 if (!is_sends) { 3545 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3546 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3547 } else { 3548 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3549 is_sends_internal = is_sends; 3550 } 3551 3552 /* get comm */ 3553 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3554 3555 /* compute number of sends */ 3556 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3557 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3558 3559 /* compute number of receives */ 3560 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3562 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3563 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3564 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3565 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3566 ierr = PetscFree(iflags);CHKERRQ(ierr); 3567 3568 /* restrict comm if requested */ 3569 subcomm = 0; 3570 destroy_mat = PETSC_FALSE; 3571 if (restrict_comm) { 3572 PetscMPIInt color,subcommsize; 3573 3574 color = 0; 3575 if (restrict_full) { 3576 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3577 } else { 3578 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3579 } 3580 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3581 subcommsize = commsize - subcommsize; 3582 /* check if reuse has been requested */ 3583 if (reuse == MAT_REUSE_MATRIX) { 3584 if (*mat_n) { 3585 PetscMPIInt subcommsize2; 3586 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3587 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3588 comm_n = PetscObjectComm((PetscObject)*mat_n); 3589 } else { 3590 comm_n = PETSC_COMM_SELF; 3591 } 3592 } else { /* MAT_INITIAL_MATRIX */ 3593 PetscMPIInt rank; 3594 3595 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3596 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3597 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3598 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3599 comm_n = PetscSubcommChild(subcomm); 3600 } 3601 /* flag to destroy *mat_n if not significative */ 3602 if (color) destroy_mat = PETSC_TRUE; 3603 } else { 3604 comm_n = comm; 3605 } 3606 3607 /* prepare send/receive buffers */ 3608 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3609 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3610 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3611 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3612 if (nis) { 3613 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3614 } 3615 3616 /* Get data from local matrices */ 3617 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3618 /* TODO: See below some guidelines on how to prepare the local buffers */ 3619 /* 3620 send_buffer_vals should contain the raw values of the local matrix 3621 send_buffer_idxs should contain: 3622 - MatType_PRIVATE type 3623 - PetscInt size_of_l2gmap 3624 - PetscInt global_row_indices[size_of_l2gmap] 3625 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3626 */ 3627 else { 3628 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3629 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3630 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3631 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3632 send_buffer_idxs[1] = i; 3633 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3634 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3635 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3636 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3637 for (i=0;i<n_sends;i++) { 3638 ilengths_vals[is_indices[i]] = len*len; 3639 ilengths_idxs[is_indices[i]] = len+2; 3640 } 3641 } 3642 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3643 /* additional is (if any) */ 3644 if (nis) { 3645 PetscMPIInt psum; 3646 PetscInt j; 3647 for (j=0,psum=0;j<nis;j++) { 3648 PetscInt plen; 3649 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3650 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3651 psum += len+1; /* indices + lenght */ 3652 } 3653 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3654 for (j=0,psum=0;j<nis;j++) { 3655 PetscInt plen; 3656 const PetscInt *is_array_idxs; 3657 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3658 send_buffer_idxs_is[psum] = plen; 3659 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3660 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3661 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3662 psum += plen+1; /* indices + lenght */ 3663 } 3664 for (i=0;i<n_sends;i++) { 3665 ilengths_idxs_is[is_indices[i]] = psum; 3666 } 3667 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3668 } 3669 3670 buf_size_idxs = 0; 3671 buf_size_vals = 0; 3672 buf_size_idxs_is = 0; 3673 for (i=0;i<n_recvs;i++) { 3674 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3675 buf_size_vals += (PetscInt)olengths_vals[i]; 3676 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3677 } 3678 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3679 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3680 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3681 3682 /* get new tags for clean communications */ 3683 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3684 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3685 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3686 3687 /* allocate for requests */ 3688 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3689 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3690 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3691 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3692 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3693 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3694 3695 /* communications */ 3696 ptr_idxs = recv_buffer_idxs; 3697 ptr_vals = recv_buffer_vals; 3698 ptr_idxs_is = recv_buffer_idxs_is; 3699 for (i=0;i<n_recvs;i++) { 3700 source_dest = onodes[i]; 3701 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3702 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3703 ptr_idxs += olengths_idxs[i]; 3704 ptr_vals += olengths_vals[i]; 3705 if (nis) { 3706 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3707 ptr_idxs_is += olengths_idxs_is[i]; 3708 } 3709 } 3710 for (i=0;i<n_sends;i++) { 3711 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3712 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3713 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3714 if (nis) { 3715 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3716 } 3717 } 3718 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3719 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3720 3721 /* assemble new l2g map */ 3722 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3723 ptr_idxs = recv_buffer_idxs; 3724 new_local_rows = 0; 3725 for (i=0;i<n_recvs;i++) { 3726 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3727 ptr_idxs += olengths_idxs[i]; 3728 } 3729 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3730 ptr_idxs = recv_buffer_idxs; 3731 new_local_rows = 0; 3732 for (i=0;i<n_recvs;i++) { 3733 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3734 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3735 ptr_idxs += olengths_idxs[i]; 3736 } 3737 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3738 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3739 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3740 3741 /* infer new local matrix type from received local matrices type */ 3742 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3743 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3744 if (n_recvs) { 3745 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3746 ptr_idxs = recv_buffer_idxs; 3747 for (i=0;i<n_recvs;i++) { 3748 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3749 new_local_type_private = MATAIJ_PRIVATE; 3750 break; 3751 } 3752 ptr_idxs += olengths_idxs[i]; 3753 } 3754 switch (new_local_type_private) { 3755 case MATDENSE_PRIVATE: 3756 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3757 new_local_type = MATSEQAIJ; 3758 bs = 1; 3759 } else { /* if I receive only 1 dense matrix */ 3760 new_local_type = MATSEQDENSE; 3761 bs = 1; 3762 } 3763 break; 3764 case MATAIJ_PRIVATE: 3765 new_local_type = MATSEQAIJ; 3766 bs = 1; 3767 break; 3768 case MATBAIJ_PRIVATE: 3769 new_local_type = MATSEQBAIJ; 3770 break; 3771 case MATSBAIJ_PRIVATE: 3772 new_local_type = MATSEQSBAIJ; 3773 break; 3774 default: 3775 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3776 break; 3777 } 3778 } else { /* by default, new_local_type is seqdense */ 3779 new_local_type = MATSEQDENSE; 3780 bs = 1; 3781 } 3782 3783 /* create MATIS object if needed */ 3784 if (reuse == MAT_INITIAL_MATRIX) { 3785 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3786 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3787 } else { 3788 /* it also destroys the local matrices */ 3789 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3790 } 3791 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3792 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3793 3794 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3795 3796 /* Global to local map of received indices */ 3797 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3798 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3799 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3800 3801 /* restore attributes -> type of incoming data and its size */ 3802 buf_size_idxs = 0; 3803 for (i=0;i<n_recvs;i++) { 3804 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3805 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3806 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3807 } 3808 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3809 3810 /* set preallocation */ 3811 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3812 if (!newisdense) { 3813 PetscInt *new_local_nnz=0; 3814 3815 ptr_vals = recv_buffer_vals; 3816 ptr_idxs = recv_buffer_idxs_local; 3817 if (n_recvs) { 3818 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3819 } 3820 for (i=0;i<n_recvs;i++) { 3821 PetscInt j; 3822 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3823 for (j=0;j<*(ptr_idxs+1);j++) { 3824 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3825 } 3826 } else { 3827 /* TODO */ 3828 } 3829 ptr_idxs += olengths_idxs[i]; 3830 } 3831 if (new_local_nnz) { 3832 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3833 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3834 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3835 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3836 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3837 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3838 } else { 3839 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3840 } 3841 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3842 } else { 3843 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3844 } 3845 3846 /* set values */ 3847 ptr_vals = recv_buffer_vals; 3848 ptr_idxs = recv_buffer_idxs_local; 3849 for (i=0;i<n_recvs;i++) { 3850 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3851 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3852 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3853 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3854 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3855 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3856 } else { 3857 /* TODO */ 3858 } 3859 ptr_idxs += olengths_idxs[i]; 3860 ptr_vals += olengths_vals[i]; 3861 } 3862 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3867 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3868 3869 #if 0 3870 if (!restrict_comm) { /* check */ 3871 Vec lvec,rvec; 3872 PetscReal infty_error; 3873 3874 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3875 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3876 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3877 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3878 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3879 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3880 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3881 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3882 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3883 } 3884 #endif 3885 3886 /* assemble new additional is (if any) */ 3887 if (nis) { 3888 PetscInt **temp_idxs,*count_is,j,psum; 3889 3890 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3891 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3892 ptr_idxs = recv_buffer_idxs_is; 3893 psum = 0; 3894 for (i=0;i<n_recvs;i++) { 3895 for (j=0;j<nis;j++) { 3896 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3897 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3898 psum += plen; 3899 ptr_idxs += plen+1; /* shift pointer to received data */ 3900 } 3901 } 3902 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3903 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3904 for (i=1;i<nis;i++) { 3905 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3906 } 3907 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3908 ptr_idxs = recv_buffer_idxs_is; 3909 for (i=0;i<n_recvs;i++) { 3910 for (j=0;j<nis;j++) { 3911 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3912 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3913 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3914 ptr_idxs += plen+1; /* shift pointer to received data */ 3915 } 3916 } 3917 for (i=0;i<nis;i++) { 3918 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3919 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3920 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3921 } 3922 ierr = PetscFree(count_is);CHKERRQ(ierr); 3923 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3924 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3925 } 3926 /* free workspace */ 3927 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3928 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3929 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3930 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3931 if (isdense) { 3932 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3933 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3934 } else { 3935 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3936 } 3937 if (nis) { 3938 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3939 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3940 } 3941 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3942 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3943 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3944 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3945 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3946 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3947 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3948 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3949 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3950 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3951 ierr = PetscFree(onodes);CHKERRQ(ierr); 3952 if (nis) { 3953 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3954 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3955 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3956 } 3957 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3958 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3959 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3960 for (i=0;i<nis;i++) { 3961 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3962 } 3963 *mat_n = NULL; 3964 } 3965 PetscFunctionReturn(0); 3966 } 3967 3968 /* temporary hack into ksp private data structure */ 3969 #include <petsc/private/kspimpl.h> 3970 3971 #undef __FUNCT__ 3972 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3973 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3974 { 3975 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3976 PC_IS *pcis = (PC_IS*)pc->data; 3977 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3978 MatNullSpace CoarseNullSpace=NULL; 3979 ISLocalToGlobalMapping coarse_islg; 3980 IS coarse_is,*isarray; 3981 PetscInt i,im_active=-1,active_procs=-1; 3982 PetscInt nis,nisdofs,nisneu; 3983 PC pc_temp; 3984 PCType coarse_pc_type; 3985 KSPType coarse_ksp_type; 3986 PetscBool multilevel_requested,multilevel_allowed; 3987 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3988 Mat t_coarse_mat_is; 3989 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3990 PetscMPIInt all_procs; 3991 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3992 PetscBool compute_vecs = PETSC_FALSE; 3993 PetscScalar *array; 3994 PetscErrorCode ierr; 3995 3996 PetscFunctionBegin; 3997 /* Assign global numbering to coarse dofs */ 3998 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3999 PetscInt ocoarse_size; 4000 compute_vecs = PETSC_TRUE; 4001 ocoarse_size = pcbddc->coarse_size; 4002 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4003 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4004 /* see if we can avoid some work */ 4005 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4006 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4007 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4008 PC pc; 4009 PetscBool isbddc; 4010 4011 /* temporary workaround since PCBDDC does not have a reset method so far */ 4012 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4013 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4014 if (isbddc) { 4015 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4016 } 4017 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4018 coarse_reuse = PETSC_FALSE; 4019 } else { /* we can safely reuse already computed coarse matrix */ 4020 coarse_reuse = PETSC_TRUE; 4021 } 4022 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4023 coarse_reuse = PETSC_FALSE; 4024 } 4025 /* reset any subassembling information */ 4026 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4027 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4028 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4029 coarse_reuse = PETSC_TRUE; 4030 } 4031 4032 /* count "active" (i.e. with positive local size) and "void" processes */ 4033 im_active = !!(pcis->n); 4034 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4035 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4036 void_procs = all_procs-active_procs; 4037 csin_type_simple = PETSC_TRUE; 4038 redist = PETSC_FALSE; 4039 if (pcbddc->current_level && void_procs) { 4040 csin_ml = PETSC_TRUE; 4041 ncoarse_ml = void_procs; 4042 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4043 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4044 csin_ds = PETSC_TRUE; 4045 ncoarse_ds = pcbddc->redistribute_coarse; 4046 redist = PETSC_TRUE; 4047 } else { 4048 csin_ds = PETSC_TRUE; 4049 ncoarse_ds = active_procs; 4050 redist = PETSC_TRUE; 4051 } 4052 } else { 4053 csin_ml = PETSC_FALSE; 4054 ncoarse_ml = all_procs; 4055 if (void_procs) { 4056 csin_ds = PETSC_TRUE; 4057 ncoarse_ds = void_procs; 4058 csin_type_simple = PETSC_FALSE; 4059 } else { 4060 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4061 csin_ds = PETSC_TRUE; 4062 ncoarse_ds = pcbddc->redistribute_coarse; 4063 redist = PETSC_TRUE; 4064 } else { 4065 csin_ds = PETSC_FALSE; 4066 ncoarse_ds = all_procs; 4067 } 4068 } 4069 } 4070 4071 /* 4072 test if we can go multilevel: three conditions must be satisfied: 4073 - we have not exceeded the number of levels requested 4074 - we can actually subassemble the active processes 4075 - we can find a suitable number of MPI processes where we can place the subassembled problem 4076 */ 4077 multilevel_allowed = PETSC_FALSE; 4078 multilevel_requested = PETSC_FALSE; 4079 if (pcbddc->current_level < pcbddc->max_levels) { 4080 multilevel_requested = PETSC_TRUE; 4081 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4082 multilevel_allowed = PETSC_FALSE; 4083 } else { 4084 multilevel_allowed = PETSC_TRUE; 4085 } 4086 } 4087 /* determine number of process partecipating to coarse solver */ 4088 if (multilevel_allowed) { 4089 ncoarse = ncoarse_ml; 4090 csin = csin_ml; 4091 redist = PETSC_FALSE; 4092 } else { 4093 ncoarse = ncoarse_ds; 4094 csin = csin_ds; 4095 } 4096 4097 /* creates temporary l2gmap and IS for coarse indexes */ 4098 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4099 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4100 4101 /* creates temporary MATIS object for coarse matrix */ 4102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4103 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4104 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4105 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4106 #if 0 4107 { 4108 PetscViewer viewer; 4109 char filename[256]; 4110 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4111 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4112 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4113 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4115 } 4116 #endif 4117 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4118 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4119 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4120 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4121 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4122 4123 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4124 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4125 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4126 const PetscInt *idxs; 4127 ISLocalToGlobalMapping tmap; 4128 4129 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4130 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4131 /* allocate space for temporary storage */ 4132 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4133 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4134 /* allocate for IS array */ 4135 nisdofs = pcbddc->n_ISForDofsLocal; 4136 nisneu = !!pcbddc->NeumannBoundariesLocal; 4137 nis = nisdofs + nisneu; 4138 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4139 /* dofs splitting */ 4140 for (i=0;i<nisdofs;i++) { 4141 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4142 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4143 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4144 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4145 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4146 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4147 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4148 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4149 } 4150 /* neumann boundaries */ 4151 if (pcbddc->NeumannBoundariesLocal) { 4152 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4153 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4154 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4155 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4156 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4157 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4158 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4159 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4160 } 4161 /* free memory */ 4162 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4163 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4164 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4165 } else { 4166 nis = 0; 4167 nisdofs = 0; 4168 nisneu = 0; 4169 isarray = NULL; 4170 } 4171 /* destroy no longer needed map */ 4172 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4173 4174 /* restrict on coarse candidates (if needed) */ 4175 coarse_mat_is = NULL; 4176 if (csin) { 4177 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4178 if (redist) { 4179 PetscMPIInt rank; 4180 PetscInt spc,n_spc_p1,dest[1],destsize; 4181 4182 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4183 spc = active_procs/ncoarse; 4184 n_spc_p1 = active_procs%ncoarse; 4185 if (im_active) { 4186 destsize = 1; 4187 if (rank > n_spc_p1*(spc+1)-1) { 4188 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4189 } else { 4190 dest[0] = rank/(spc+1); 4191 } 4192 } else { 4193 destsize = 0; 4194 } 4195 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4196 } else if (csin_type_simple) { 4197 PetscMPIInt rank; 4198 PetscInt issize,isidx; 4199 4200 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4201 if (im_active) { 4202 issize = 1; 4203 isidx = (PetscInt)rank; 4204 } else { 4205 issize = 0; 4206 isidx = -1; 4207 } 4208 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4209 } else { /* get a suitable subassembling pattern from MATIS code */ 4210 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4211 } 4212 4213 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4214 if (!redist || ncoarse <= void_procs) { 4215 PetscInt ncoarse_cand,tissize,*nisindices; 4216 PetscInt *coarse_candidates; 4217 const PetscInt* tisindices; 4218 4219 /* get coarse candidates' ranks in pc communicator */ 4220 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4221 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4222 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4223 if (!coarse_candidates[i]) { 4224 coarse_candidates[ncoarse_cand++]=i; 4225 } 4226 } 4227 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4228 4229 4230 if (pcbddc->dbg_flag) { 4231 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4232 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4233 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4234 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4235 for (i=0;i<ncoarse_cand;i++) { 4236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4237 } 4238 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4239 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4240 } 4241 /* shift the pattern on coarse candidates */ 4242 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4243 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4244 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4245 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4246 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4247 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4248 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4249 } 4250 if (pcbddc->dbg_flag) { 4251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4252 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4253 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4254 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4255 } 4256 } 4257 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4258 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4259 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4260 } else { /* this is the last level, so use just receiving processes in subcomm */ 4261 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4262 } 4263 } else { 4264 if (pcbddc->dbg_flag) { 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4268 } 4269 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4270 coarse_mat_is = t_coarse_mat_is; 4271 } 4272 4273 /* create local to global scatters for coarse problem */ 4274 if (compute_vecs) { 4275 PetscInt lrows; 4276 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4277 if (coarse_mat_is) { 4278 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4279 } else { 4280 lrows = 0; 4281 } 4282 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4283 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4284 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4285 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4286 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4287 } 4288 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4289 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4290 4291 /* set defaults for coarse KSP and PC */ 4292 if (multilevel_allowed) { 4293 coarse_ksp_type = KSPRICHARDSON; 4294 coarse_pc_type = PCBDDC; 4295 } else { 4296 coarse_ksp_type = KSPPREONLY; 4297 coarse_pc_type = PCREDUNDANT; 4298 } 4299 4300 /* print some info if requested */ 4301 if (pcbddc->dbg_flag) { 4302 if (!multilevel_allowed) { 4303 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4304 if (multilevel_requested) { 4305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4306 } else if (pcbddc->max_levels) { 4307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4308 } 4309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4310 } 4311 } 4312 4313 /* create the coarse KSP object only once with defaults */ 4314 if (coarse_mat_is) { 4315 MatReuse coarse_mat_reuse; 4316 PetscViewer dbg_viewer = NULL; 4317 if (pcbddc->dbg_flag) { 4318 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4319 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4320 } 4321 if (!pcbddc->coarse_ksp) { 4322 char prefix[256],str_level[16]; 4323 size_t len; 4324 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4325 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4326 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4327 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4328 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4329 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4330 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4331 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4332 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4333 /* prefix */ 4334 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4335 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4336 if (!pcbddc->current_level) { 4337 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4338 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4339 } else { 4340 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4341 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4342 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4343 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4344 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4345 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4346 } 4347 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4348 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4349 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4350 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4351 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4352 /* allow user customization */ 4353 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4354 } 4355 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4356 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4357 if (nisdofs) { 4358 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4359 for (i=0;i<nisdofs;i++) { 4360 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4361 } 4362 } 4363 if (nisneu) { 4364 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4365 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4366 } 4367 4368 /* get some info after set from options */ 4369 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4370 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4371 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4372 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4373 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4374 isbddc = PETSC_FALSE; 4375 } 4376 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4377 if (isredundant) { 4378 KSP inner_ksp; 4379 PC inner_pc; 4380 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4381 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4382 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4383 } 4384 4385 /* assemble coarse matrix */ 4386 if (coarse_reuse) { 4387 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4388 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4389 coarse_mat_reuse = MAT_REUSE_MATRIX; 4390 } else { 4391 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4392 } 4393 if (isbddc || isnn) { 4394 if (pcbddc->coarsening_ratio > 1) { 4395 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4396 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4397 if (pcbddc->dbg_flag) { 4398 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4399 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4400 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4401 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4402 } 4403 } 4404 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4405 } else { 4406 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4407 coarse_mat = coarse_mat_is; 4408 } 4409 } else { 4410 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4411 } 4412 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4413 4414 /* propagate symmetry info of coarse matrix */ 4415 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4416 if (pc->pmat->symmetric_set) { 4417 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4418 } 4419 if (pc->pmat->hermitian_set) { 4420 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4421 } 4422 if (pc->pmat->spd_set) { 4423 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4424 } 4425 /* set operators */ 4426 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4427 if (pcbddc->dbg_flag) { 4428 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4429 } 4430 } else { /* processes non partecipating to coarse solver (if any) */ 4431 coarse_mat = 0; 4432 } 4433 ierr = PetscFree(isarray);CHKERRQ(ierr); 4434 #if 0 4435 { 4436 PetscViewer viewer; 4437 char filename[256]; 4438 sprintf(filename,"coarse_mat.m"); 4439 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4440 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4441 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4442 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4443 } 4444 #endif 4445 4446 /* Compute coarse null space (special handling by BDDC only) */ 4447 #if 0 4448 if (pcbddc->NullSpace) { 4449 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4450 } 4451 #endif 4452 4453 if (pcbddc->coarse_ksp) { 4454 Vec crhs,csol; 4455 PetscBool ispreonly; 4456 4457 if (CoarseNullSpace) { 4458 if (isbddc) { 4459 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4460 } else { 4461 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4462 } 4463 } 4464 /* setup coarse ksp */ 4465 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4466 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4467 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4468 /* hack */ 4469 if (!csol) { 4470 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4471 } 4472 if (!crhs) { 4473 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4474 } 4475 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4476 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4477 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4478 KSP check_ksp; 4479 KSPType check_ksp_type; 4480 PC check_pc; 4481 Vec check_vec,coarse_vec; 4482 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4483 PetscInt its; 4484 PetscBool compute_eigs; 4485 PetscReal *eigs_r,*eigs_c; 4486 PetscInt neigs; 4487 const char *prefix; 4488 4489 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4490 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4491 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4492 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4493 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4494 if (ispreonly) { 4495 check_ksp_type = KSPPREONLY; 4496 compute_eigs = PETSC_FALSE; 4497 } else { 4498 check_ksp_type = KSPGMRES; 4499 compute_eigs = PETSC_TRUE; 4500 } 4501 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4502 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4503 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4504 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4505 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4506 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4507 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4508 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4509 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4510 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4511 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4512 /* create random vec */ 4513 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4514 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4515 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4516 if (CoarseNullSpace) { 4517 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4518 } 4519 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4520 /* solve coarse problem */ 4521 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4522 if (CoarseNullSpace) { 4523 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4524 } 4525 /* set eigenvalue estimation if preonly has not been requested */ 4526 if (compute_eigs) { 4527 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4528 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4529 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4530 lambda_max = eigs_r[neigs-1]; 4531 lambda_min = eigs_r[0]; 4532 if (pcbddc->use_coarse_estimates) { 4533 if (lambda_max>lambda_min) { 4534 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4535 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4536 } 4537 } 4538 } 4539 4540 /* check coarse problem residual error */ 4541 if (pcbddc->dbg_flag) { 4542 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4543 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4544 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4545 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4546 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4547 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4548 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4550 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4551 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4552 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4553 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4554 if (compute_eigs) { 4555 PetscReal lambda_max_s,lambda_min_s; 4556 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4557 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4558 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4559 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4560 for (i=0;i<neigs;i++) { 4561 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4562 } 4563 } 4564 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4565 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4566 } 4567 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4568 if (compute_eigs) { 4569 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4570 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4571 } 4572 } 4573 } 4574 /* print additional info */ 4575 if (pcbddc->dbg_flag) { 4576 /* waits until all processes reaches this point */ 4577 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4580 } 4581 4582 /* free memory */ 4583 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4584 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 #undef __FUNCT__ 4589 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4590 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4591 { 4592 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4593 PC_IS* pcis = (PC_IS*)pc->data; 4594 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4595 IS subset,subset_mult,subset_n; 4596 PetscInt local_size,coarse_size=0; 4597 PetscInt *local_primal_indices=NULL; 4598 const PetscInt *t_local_primal_indices; 4599 PetscErrorCode ierr; 4600 4601 PetscFunctionBegin; 4602 /* Compute global number of coarse dofs */ 4603 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4604 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4605 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4606 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4608 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4609 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4610 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4611 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4612 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4613 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4614 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4615 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4616 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4617 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4618 4619 /* check numbering */ 4620 if (pcbddc->dbg_flag) { 4621 PetscScalar coarsesum,*array; 4622 PetscInt i; 4623 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4624 4625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4626 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4627 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4628 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4629 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4630 for (i=0;i<pcbddc->local_primal_size;i++) { 4631 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4632 } 4633 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4634 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4635 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4636 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4637 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4638 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4639 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4640 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4641 for (i=0;i<pcis->n;i++) { 4642 if (array[i] == 1.0) { 4643 set_error = PETSC_TRUE; 4644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4645 } 4646 } 4647 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4648 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4649 for (i=0;i<pcis->n;i++) { 4650 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4651 } 4652 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4653 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4654 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4655 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4656 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4658 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4659 PetscInt *gidxs; 4660 4661 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4662 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4664 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4665 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4666 for (i=0;i<pcbddc->local_primal_size;i++) { 4667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4668 } 4669 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4670 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4671 } 4672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4673 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4674 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4675 } 4676 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4677 /* get back data */ 4678 *coarse_size_n = coarse_size; 4679 *local_primal_indices_n = local_primal_indices; 4680 PetscFunctionReturn(0); 4681 } 4682 4683 #undef __FUNCT__ 4684 #define __FUNCT__ "PCBDDCGlobalToLocal" 4685 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4686 { 4687 IS localis_t; 4688 PetscInt i,lsize,*idxs,n; 4689 PetscScalar *vals; 4690 PetscErrorCode ierr; 4691 4692 PetscFunctionBegin; 4693 /* get indices in local ordering exploiting local to global map */ 4694 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4695 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4696 for (i=0;i<lsize;i++) vals[i] = 1.0; 4697 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4698 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4699 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4700 if (idxs) { /* multilevel guard */ 4701 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4702 } 4703 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4704 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4705 ierr = PetscFree(vals);CHKERRQ(ierr); 4706 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4707 /* now compute set in local ordering */ 4708 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4709 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4710 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4711 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4712 for (i=0,lsize=0;i<n;i++) { 4713 if (PetscRealPart(vals[i]) > 0.5) { 4714 lsize++; 4715 } 4716 } 4717 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4718 for (i=0,lsize=0;i<n;i++) { 4719 if (PetscRealPart(vals[i]) > 0.5) { 4720 idxs[lsize++] = i; 4721 } 4722 } 4723 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4724 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4725 *localis = localis_t; 4726 PetscFunctionReturn(0); 4727 } 4728 4729 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4730 #undef __FUNCT__ 4731 #define __FUNCT__ "PCBDDCMatMult_Private" 4732 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4733 { 4734 PCBDDCChange_ctx change_ctx; 4735 PetscErrorCode ierr; 4736 4737 PetscFunctionBegin; 4738 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4739 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4740 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4741 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4742 PetscFunctionReturn(0); 4743 } 4744 4745 #undef __FUNCT__ 4746 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4747 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4748 { 4749 PCBDDCChange_ctx change_ctx; 4750 PetscErrorCode ierr; 4751 4752 PetscFunctionBegin; 4753 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4754 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4755 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4756 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4757 PetscFunctionReturn(0); 4758 } 4759 4760 #undef __FUNCT__ 4761 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4762 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4763 { 4764 PC_IS *pcis=(PC_IS*)pc->data; 4765 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4766 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4767 Mat S_j; 4768 PetscInt *used_xadj,*used_adjncy; 4769 PetscBool free_used_adj; 4770 PetscErrorCode ierr; 4771 4772 PetscFunctionBegin; 4773 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4774 free_used_adj = PETSC_FALSE; 4775 if (pcbddc->sub_schurs_layers == -1) { 4776 used_xadj = NULL; 4777 used_adjncy = NULL; 4778 } else { 4779 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4780 used_xadj = pcbddc->mat_graph->xadj; 4781 used_adjncy = pcbddc->mat_graph->adjncy; 4782 } else if (pcbddc->computed_rowadj) { 4783 used_xadj = pcbddc->mat_graph->xadj; 4784 used_adjncy = pcbddc->mat_graph->adjncy; 4785 } else { 4786 PetscBool flg_row=PETSC_FALSE; 4787 const PetscInt *xadj,*adjncy; 4788 PetscInt nvtxs; 4789 4790 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4791 if (flg_row) { 4792 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4793 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4794 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4795 free_used_adj = PETSC_TRUE; 4796 } else { 4797 pcbddc->sub_schurs_layers = -1; 4798 used_xadj = NULL; 4799 used_adjncy = NULL; 4800 } 4801 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4802 } 4803 } 4804 4805 /* setup sub_schurs data */ 4806 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4807 if (!sub_schurs->use_mumps) { 4808 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4809 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4810 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4811 } else { 4812 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4813 PetscBool isseqaij; 4814 if (!pcbddc->use_vertices && reuse_solvers) { 4815 PetscInt n_vertices; 4816 4817 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4818 reuse_solvers = (PetscBool)!n_vertices; 4819 } 4820 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4821 if (!isseqaij) { 4822 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4823 if (matis->A == pcbddc->local_mat) { 4824 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4825 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4826 } else { 4827 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4828 } 4829 } 4830 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4831 } 4832 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4833 4834 /* free adjacency */ 4835 if (free_used_adj) { 4836 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4837 } 4838 PetscFunctionReturn(0); 4839 } 4840 4841 #undef __FUNCT__ 4842 #define __FUNCT__ "PCBDDCInitSubSchurs" 4843 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4844 { 4845 PC_IS *pcis=(PC_IS*)pc->data; 4846 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4847 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4848 PCBDDCGraph graph; 4849 PetscErrorCode ierr; 4850 4851 PetscFunctionBegin; 4852 /* attach interface graph for determining subsets */ 4853 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4854 IS verticesIS,verticescomm; 4855 PetscInt vsize,*idxs; 4856 4857 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4858 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4859 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4860 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4861 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4862 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4863 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4864 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4865 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4866 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4867 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4868 /* 4869 if (pcbddc->dbg_flag) { 4870 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4871 } 4872 */ 4873 } else { 4874 graph = pcbddc->mat_graph; 4875 } 4876 4877 /* sub_schurs init */ 4878 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4879 4880 /* free graph struct */ 4881 if (pcbddc->sub_schurs_rebuild) { 4882 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4883 } 4884 PetscFunctionReturn(0); 4885 } 4886