1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 273 } else { 274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 291 for (j=0;j<B_N;j++) { 292 #if defined(PETSC_USE_COMPLEX) 293 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 294 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 295 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 296 #else 297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 298 #endif 299 } 300 } 301 } 302 #if 0 303 for (j=0;j<B_neigs;j++) { 304 PetscBLASInt Blas_N,Blas_one = 1.0; 305 PetscScalar norm; 306 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 307 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 308 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 310 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 311 } else { 312 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 313 } 314 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 315 } 316 #endif 317 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 318 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 319 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 320 cum++; 321 } 322 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 323 /* shift for next computation */ 324 cumarray += subset_size*subset_size; 325 } 326 if (pcbddc->dbg_flag) { 327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 328 } 329 330 if (mss) { 331 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 332 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 333 /* destroy matrices (junk) */ 334 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 335 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 336 } 337 if (allocated_S_St) { 338 ierr = PetscFree2(S,St);CHKERRQ(ierr); 339 } 340 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 ierr = PetscFree(rwork);CHKERRQ(ierr); 343 #endif 344 if (pcbddc->dbg_flag) { 345 PetscInt maxneigs_r; 346 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "PCBDDCSetUpSolvers" 354 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 355 { 356 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 357 PetscScalar *coarse_submat_vals; 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 /* Setup local scatters R_to_B and (optionally) R_to_D */ 362 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 363 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 364 365 /* Setup local neumann solver ksp_R */ 366 /* PCBDDCSetUpLocalScatters should be called first! */ 367 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 368 369 /* Change global null space passed in by the user if change of basis has been requested */ 370 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 371 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 372 } 373 374 /* 375 Setup local correction and local part of coarse basis. 376 Gives back the dense local part of the coarse matrix in column major ordering 377 */ 378 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 379 380 /* Compute total number of coarse nodes and setup coarse solver */ 381 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 382 383 /* free */ 384 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetCustomization" 390 PetscErrorCode PCBDDCResetCustomization(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 397 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 398 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 399 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 401 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 402 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 403 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 405 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 406 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCBDDCResetTopography" 412 PetscErrorCode PCBDDCResetTopography(PC pc) 413 { 414 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 421 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 422 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCResetSolvers" 428 PetscErrorCode PCBDDCResetSolvers(PC pc) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 PetscScalar *array; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 436 if (pcbddc->coarse_phi_B) { 437 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 438 ierr = PetscFree(array);CHKERRQ(ierr); 439 } 440 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 444 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 449 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 451 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 452 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 453 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 454 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 455 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 456 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 459 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 460 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 461 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 462 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 469 { 470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 471 PC_IS *pcis = (PC_IS*)pc->data; 472 VecType impVecType; 473 PetscInt n_constraints,n_R,old_size; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 if (!pcbddc->ConstraintMatrix) { 478 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 479 } 480 /* get sizes */ 481 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 482 n_R = pcis->n-pcbddc->n_vertices; 483 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 484 /* local work vectors (try to avoid unneeded work)*/ 485 /* R nodes */ 486 old_size = -1; 487 if (pcbddc->vec1_R) { 488 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 489 } 490 if (n_R != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 492 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 493 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 495 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 496 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 497 } 498 /* local primal dofs */ 499 old_size = -1; 500 if (pcbddc->vec1_P) { 501 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 502 } 503 if (pcbddc->local_primal_size != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 508 } 509 /* local explicit constraints */ 510 old_size = -1; 511 if (pcbddc->vec1_C) { 512 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 513 } 514 if (n_constraints && n_constraints != old_size) { 515 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 516 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 518 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCBDDCSetUpCorrection" 525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 526 { 527 PetscErrorCode ierr; 528 /* pointers to pcis and pcbddc */ 529 PC_IS* pcis = (PC_IS*)pc->data; 530 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 531 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 532 /* submatrices of local problem */ 533 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 534 /* submatrices of local coarse problem */ 535 Mat S_VV,S_CV,S_VC,S_CC; 536 /* working matrices */ 537 Mat C_CR; 538 /* additional working stuff */ 539 PC pc_R; 540 Mat F; 541 PetscBool isLU,isCHOL,isILU; 542 543 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 544 PetscScalar *work; 545 PetscInt *idx_V_B; 546 PetscInt n,n_vertices,n_constraints; 547 PetscInt i,n_R,n_D,n_B; 548 PetscBool unsymmetric_check; 549 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 550 MatType impMatType; 551 /* some shortcuts to scalars */ 552 PetscScalar one=1.0,m_one=-1.0; 553 554 PetscFunctionBegin; 555 n_vertices = pcbddc->n_vertices; 556 n_constraints = pcbddc->local_primal_size-n_vertices; 557 /* Set Non-overlapping dimensions */ 558 n_B = pcis->n_B; 559 n_D = pcis->n - n_B; 560 n_R = pcis->n - n_vertices; 561 562 /* Set types for local objects needed by BDDC precondtioner */ 563 impMatType = MATSEQDENSE; 564 565 /* vertices in boundary numbering */ 566 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 567 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 568 if (i != n_vertices) { 569 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 570 } 571 572 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 573 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 576 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 577 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 579 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 581 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 582 583 unsymmetric_check = PETSC_FALSE; 584 /* allocate workspace */ 585 n = 0; 586 if (n_constraints) { 587 n += n_R*n_constraints; 588 } 589 if (n_vertices) { 590 n = PetscMax(2*n_R*n_vertices,n); 591 n = PetscMax((n_R+n_B)*n_vertices,n); 592 } 593 if (!pcbddc->symmetric_primal) { 594 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 595 unsymmetric_check = PETSC_TRUE; 596 } 597 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 598 599 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 600 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 601 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 602 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 603 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 604 if (isLU || isILU || isCHOL) { 605 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 606 } else if (sub_schurs->reuse_mumps) { 607 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 608 MatFactorType type; 609 610 F = reuse_mumps->F; 611 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 612 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 613 } else { 614 F = NULL; 615 } 616 617 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 618 if (n_constraints) { 619 Mat M1,M2,M3; 620 Mat auxmat; 621 IS is_aux; 622 PetscScalar *array,*array2; 623 624 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 626 627 /* Extract constraints on R nodes: C_{CR} */ 628 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 629 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 630 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 631 632 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 633 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 634 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 635 for (i=0;i<n_constraints;i++) { 636 const PetscScalar *row_cmat_values; 637 const PetscInt *row_cmat_indices; 638 PetscInt size_of_constraint,j; 639 640 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 641 for (j=0;j<size_of_constraint;j++) { 642 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 643 } 644 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 647 if (F) { 648 Mat B; 649 650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 651 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 652 ierr = MatDestroy(&B);CHKERRQ(ierr); 653 } else { 654 PetscScalar *marr; 655 656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 663 } 664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 665 } 666 if (!pcbddc->switch_static) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 670 for (i=0;i<n_constraints;i++) { 671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 677 } 678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 680 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 681 } else { 682 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 683 pcbddc->local_auxmat2 = local_auxmat2_R; 684 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 685 } 686 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 687 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 688 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 689 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 690 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 691 if (isCHOL) { 692 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 693 } else { 694 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 695 } 696 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 697 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 698 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 699 ierr = MatDestroy(&M2);CHKERRQ(ierr); 700 ierr = MatDestroy(&M3);CHKERRQ(ierr); 701 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 702 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 703 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 704 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 705 ierr = MatDestroy(&M1);CHKERRQ(ierr); 706 } 707 /* Get submatrices from subdomain matrix */ 708 if (n_vertices) { 709 IS is_aux; 710 711 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 712 IS tis; 713 714 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 715 ierr = ISSort(tis);CHKERRQ(ierr); 716 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 ierr = ISDestroy(&tis);CHKERRQ(ierr); 718 } else { 719 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 } 721 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 724 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 725 } 726 727 /* Matrix of coarse basis functions (local) */ 728 if (pcbddc->coarse_phi_B) { 729 PetscInt on_B,on_primal,on_D=n_D; 730 if (pcbddc->coarse_phi_D) { 731 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 732 } 733 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 734 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 735 PetscScalar *marray; 736 737 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 738 ierr = PetscFree(marray);CHKERRQ(ierr); 739 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 740 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 741 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 743 } 744 } 745 746 if (!pcbddc->coarse_phi_B) { 747 PetscScalar *marray; 748 749 n = n_B*pcbddc->local_primal_size; 750 if (pcbddc->switch_static || pcbddc->dbg_flag) { 751 n += n_D*pcbddc->local_primal_size; 752 } 753 if (!pcbddc->symmetric_primal) { 754 n *= 2; 755 } 756 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 758 n = n_B*pcbddc->local_primal_size; 759 if (pcbddc->switch_static || pcbddc->dbg_flag) { 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 761 n += n_D*pcbddc->local_primal_size; 762 } 763 if (!pcbddc->symmetric_primal) { 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n = n_B*pcbddc->local_primal_size; 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 768 } 769 } else { 770 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 771 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 774 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 775 } 776 } 777 } 778 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 779 /* vertices */ 780 if (n_vertices) { 781 782 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 783 784 if (n_R) { 785 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 786 PetscBLASInt B_N,B_one = 1; 787 PetscScalar *x,*y; 788 PetscBool isseqaij; 789 790 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 791 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 793 if (F) { /* TODO could be optimized for symmetric problems */ 794 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 795 } else { 796 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 797 for (i=0;i<n_vertices;i++) { 798 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 799 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 800 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 801 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 802 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 803 } 804 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 805 } 806 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 807 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 808 if (n_constraints) { 809 Mat B; 810 811 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 812 for (i=0;i<n_vertices;i++) { 813 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 814 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 818 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 819 } 820 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 822 ierr = MatDestroy(&B);CHKERRQ(ierr); 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 825 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 826 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 827 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 828 ierr = MatDestroy(&B);CHKERRQ(ierr); 829 } 830 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 831 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 832 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 833 } 834 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 835 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 836 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 837 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 838 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 840 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 842 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 843 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 844 } else { 845 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 } 847 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 848 /* coarse basis functions */ 849 for (i=0;i<n_vertices;i++) { 850 PetscScalar *y; 851 852 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 854 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 y[n_B*i+idx_V_B[i]] = 1.0; 858 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 859 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 860 861 if (pcbddc->switch_static || pcbddc->dbg_flag) { 862 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 863 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 864 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 867 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 868 } 869 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 870 } 871 /* if n_R == 0 the object is not destroyed */ 872 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 873 } 874 875 if (n_constraints) { 876 Mat B; 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 879 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 880 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 881 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 882 if (n_vertices) { 883 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 884 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 885 } else { 886 Mat S_VCt; 887 888 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 889 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 891 } 892 } 893 ierr = MatDestroy(&B);CHKERRQ(ierr); 894 /* coarse basis functions */ 895 for (i=0;i<n_constraints;i++) { 896 PetscScalar *y; 897 898 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 899 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 900 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 901 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 904 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 905 if (pcbddc->switch_static || pcbddc->dbg_flag) { 906 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 907 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 908 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 911 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 912 } 913 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 914 } 915 } 916 if (n_constraints) { 917 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 918 } 919 920 /* compute other basis functions for non-symmetric problems */ 921 if (!pcbddc->symmetric_primal) { 922 923 if (n_constraints) { 924 Mat S_CCT,B_C; 925 926 /* this is a lazy thing */ 927 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 928 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 929 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 930 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 931 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 932 if (n_vertices) { 933 Mat B_V,S_VCT; 934 935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 936 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 937 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 938 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 940 } 941 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 942 } else { /* if there are no constraints, reset work */ 943 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 944 } 945 if (n_vertices && n_R) { 946 Mat A_VRT; 947 PetscScalar *marray; 948 PetscBLASInt B_N,B_one = 1; 949 950 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 951 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 952 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 953 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 954 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 955 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 957 } 958 959 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 960 for (i=0;i<pcbddc->local_primal_size;i++) { 961 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 962 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 963 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 964 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 965 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 966 } 967 } else { 968 for (i=0;i<pcbddc->local_primal_size;i++) { 969 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 970 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 971 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 972 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 973 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 974 } 975 } 976 /* coarse basis functions */ 977 for (i=0;i<pcbddc->local_primal_size;i++) { 978 PetscScalar *y; 979 980 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 981 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 982 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 if (i<n_vertices) { 986 y[n_B*i+idx_V_B[i]] = 1.0; 987 } 988 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 989 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 990 991 if (pcbddc->switch_static || pcbddc->dbg_flag) { 992 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 993 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 994 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 995 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 997 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 998 } 999 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1000 } 1001 } 1002 /* free memory */ 1003 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1004 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1005 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1006 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1008 ierr = PetscFree(work);CHKERRQ(ierr); 1009 if (n_vertices) { 1010 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1011 } 1012 if (n_constraints) { 1013 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1014 } 1015 /* Checking coarse_sub_mat and coarse basis functios */ 1016 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1017 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1018 if (pcbddc->dbg_flag) { 1019 Mat coarse_sub_mat; 1020 Mat AUXMAT,TM1,TM2,TM3,TM4; 1021 Mat coarse_phi_D,coarse_phi_B; 1022 Mat coarse_psi_D,coarse_psi_B; 1023 Mat A_II,A_BB,A_IB,A_BI; 1024 Mat C_B,CPHI; 1025 IS is_dummy; 1026 Vec mones; 1027 MatType checkmattype=MATSEQAIJ; 1028 PetscReal real_value; 1029 1030 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1031 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1032 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1033 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1034 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1035 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1036 if (unsymmetric_check) { 1037 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1039 } 1040 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1041 1042 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1043 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1045 if (unsymmetric_check) { 1046 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1047 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1048 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1049 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 } else { 1059 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1060 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 } 1068 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1069 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1070 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1071 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1074 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1076 1077 /* check constraints */ 1078 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1079 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1080 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1081 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1082 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1083 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1084 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1086 if (unsymmetric_check) { 1087 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 } 1093 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1094 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1095 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1096 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1097 1098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1108 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1109 if (unsymmetric_check) { 1110 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1112 } 1113 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1114 } 1115 /* get back data */ 1116 *coarse_submat_vals_n = coarse_submat_vals; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1122 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1123 { 1124 Mat *work_mat; 1125 IS isrow_s,iscol_s; 1126 PetscBool rsorted,csorted; 1127 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1132 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1134 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1135 1136 if (!rsorted) { 1137 const PetscInt *idxs; 1138 PetscInt *idxs_sorted,i; 1139 1140 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1142 for (i=0;i<rsize;i++) { 1143 idxs_perm_r[i] = i; 1144 } 1145 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1146 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1147 for (i=0;i<rsize;i++) { 1148 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1149 } 1150 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1151 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1152 } else { 1153 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1154 isrow_s = isrow; 1155 } 1156 1157 if (!csorted) { 1158 if (isrow == iscol) { 1159 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1160 iscol_s = isrow_s; 1161 } else { 1162 const PetscInt *idxs; 1163 PetscInt *idxs_sorted,i; 1164 1165 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1167 for (i=0;i<csize;i++) { 1168 idxs_perm_c[i] = i; 1169 } 1170 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1171 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1172 for (i=0;i<csize;i++) { 1173 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1174 } 1175 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1176 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1177 } 1178 } else { 1179 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1180 iscol_s = iscol; 1181 } 1182 1183 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1184 1185 if (!rsorted || !csorted) { 1186 Mat new_mat; 1187 IS is_perm_r,is_perm_c; 1188 1189 if (!rsorted) { 1190 PetscInt *idxs_r,i; 1191 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1192 for (i=0;i<rsize;i++) { 1193 idxs_r[idxs_perm_r[i]] = i; 1194 } 1195 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1196 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1197 } else { 1198 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1199 } 1200 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1201 1202 if (!csorted) { 1203 if (isrow_s == iscol_s) { 1204 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1205 is_perm_c = is_perm_r; 1206 } else { 1207 PetscInt *idxs_c,i; 1208 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1209 for (i=0;i<csize;i++) { 1210 idxs_c[idxs_perm_c[i]] = i; 1211 } 1212 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1213 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1214 } 1215 } else { 1216 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1219 1220 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1221 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1222 work_mat[0] = new_mat; 1223 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1224 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1225 } 1226 1227 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1228 *B = work_mat[0]; 1229 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1230 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1231 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1237 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1238 { 1239 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1240 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1241 Mat new_mat; 1242 IS is_local,is_global; 1243 PetscInt local_size; 1244 PetscBool isseqaij; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1249 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1250 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1252 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1253 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1254 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1255 1256 /* check */ 1257 if (pcbddc->dbg_flag) { 1258 Vec x,x_change; 1259 PetscReal error; 1260 1261 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1262 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1263 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1267 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1269 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1270 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x);CHKERRQ(ierr); 1274 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1275 } 1276 1277 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1278 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1279 if (isseqaij) { 1280 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1281 } else { 1282 Mat work_mat; 1283 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1284 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1286 } 1287 if (matis->A->symmetric_set) { 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1289 #if !defined(PETSC_USE_COMPLEX) 1290 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1291 #endif 1292 } 1293 /* 1294 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1295 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1296 */ 1297 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1303 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1304 { 1305 PC_IS* pcis = (PC_IS*)(pc->data); 1306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1308 PetscInt *idx_R_local=NULL; 1309 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1310 PetscInt vbs,bs; 1311 PetscBT bitmask=NULL; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 /* 1316 No need to setup local scatters if 1317 - primal space is unchanged 1318 AND 1319 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1320 AND 1321 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1322 */ 1323 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1324 PetscFunctionReturn(0); 1325 } 1326 /* destroy old objects */ 1327 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1328 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1329 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1330 /* Set Non-overlapping dimensions */ 1331 n_B = pcis->n_B; 1332 n_D = pcis->n - n_B; 1333 n_vertices = pcbddc->n_vertices; 1334 1335 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1336 1337 /* create auxiliary bitmask and allocate workspace */ 1338 if (!sub_schurs->reuse_mumps) { 1339 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1340 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1341 for (i=0;i<n_vertices;i++) { 1342 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1343 } 1344 1345 for (i=0, n_R=0; i<pcis->n; i++) { 1346 if (!PetscBTLookup(bitmask,i)) { 1347 idx_R_local[n_R++] = i; 1348 } 1349 } 1350 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1351 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1352 1353 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1354 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1355 } 1356 1357 /* Block code */ 1358 vbs = 1; 1359 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1360 if (bs>1 && !(n_vertices%bs)) { 1361 PetscBool is_blocked = PETSC_TRUE; 1362 PetscInt *vary; 1363 if (!sub_schurs->reuse_mumps) { 1364 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1365 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1366 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1367 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1368 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1369 for (i=0; i<pcis->n/bs; i++) { 1370 if (vary[i]!=0 && vary[i]!=bs) { 1371 is_blocked = PETSC_FALSE; 1372 break; 1373 } 1374 } 1375 ierr = PetscFree(vary);CHKERRQ(ierr); 1376 } else { 1377 /* Verify directly the R set */ 1378 for (i=0; i<n_R/bs; i++) { 1379 PetscInt j,node=idx_R_local[bs*i]; 1380 for (j=1; j<bs; j++) { 1381 if (node != idx_R_local[bs*i+j]-j) { 1382 is_blocked = PETSC_FALSE; 1383 break; 1384 } 1385 } 1386 } 1387 } 1388 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1389 vbs = bs; 1390 for (i=0;i<n_R/vbs;i++) { 1391 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1392 } 1393 } 1394 } 1395 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1396 if (sub_schurs->reuse_mumps) { 1397 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1398 1399 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1400 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1401 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1402 reuse_mumps->is_R = pcbddc->is_R_local; 1403 } else { 1404 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1405 } 1406 1407 /* print some info if requested */ 1408 if (pcbddc->dbg_flag) { 1409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 } 1417 1418 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1419 if (!sub_schurs->reuse_mumps) { 1420 IS is_aux1,is_aux2; 1421 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1422 1423 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1425 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1426 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1427 for (i=0; i<n_D; i++) { 1428 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1429 } 1430 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0, j=0; i<n_R; i++) { 1432 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1433 aux_array1[j++] = i; 1434 } 1435 } 1436 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1437 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 for (i=0, j=0; i<n_B; i++) { 1439 if (!PetscBTLookup(bitmask,is_indices[i])) { 1440 aux_array2[j++] = i; 1441 } 1442 } 1443 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1447 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1448 1449 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1450 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_R; i++) { 1452 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1453 aux_array1[j++] = i; 1454 } 1455 } 1456 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1459 } 1460 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1461 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1462 } else { 1463 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1464 IS tis; 1465 PetscInt schur_size; 1466 1467 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1468 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1469 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1470 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1472 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 } 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1483 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1484 { 1485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1486 PC_IS *pcis = (PC_IS*)pc->data; 1487 PC pc_temp; 1488 Mat A_RR; 1489 MatReuse reuse; 1490 PetscScalar m_one = -1.0; 1491 PetscReal value; 1492 PetscInt n_D,n_R; 1493 PetscBool use_exact,use_exact_reduced,issbaij; 1494 PetscErrorCode ierr; 1495 /* prefixes stuff */ 1496 char dir_prefix[256],neu_prefix[256],str_level[16]; 1497 size_t len; 1498 1499 PetscFunctionBegin; 1500 1501 /* compute prefixes */ 1502 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1503 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1504 if (!pcbddc->current_level) { 1505 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1506 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1507 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1508 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1509 } else { 1510 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1511 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1512 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1513 len -= 15; /* remove "pc_bddc_coarse_" */ 1514 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1515 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1516 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1517 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1522 } 1523 1524 /* DIRICHLET PROBLEM */ 1525 if (dirichlet) { 1526 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1527 if (pcbddc->local_mat->symmetric_set) { 1528 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1529 } 1530 /* Matrix for Dirichlet problem is pcis->A_II */ 1531 n_D = pcis->n - pcis->n_B; 1532 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1533 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1534 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1535 /* default */ 1536 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1537 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1538 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1539 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1540 if (issbaij) { 1541 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1542 } else { 1543 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1544 } 1545 /* Allow user's customization */ 1546 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1548 } 1549 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1550 if (sub_schurs->reuse_mumps) { 1551 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1552 1553 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1554 } 1555 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1556 if (!n_D) { 1557 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1558 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1559 } 1560 /* Set Up KSP for Dirichlet problem of BDDC */ 1561 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1562 /* set ksp_D into pcis data */ 1563 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1564 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1565 pcis->ksp_D = pcbddc->ksp_D; 1566 } 1567 1568 /* NEUMANN PROBLEM */ 1569 A_RR = 0; 1570 if (neumann) { 1571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1572 PetscInt ibs,mbs; 1573 PetscBool issbaij; 1574 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1575 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1576 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1577 if (pcbddc->ksp_R) { /* already created ksp */ 1578 PetscInt nn_R; 1579 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1580 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1581 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1582 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1583 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1584 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1585 reuse = MAT_INITIAL_MATRIX; 1586 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1587 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* safe to reuse the matrix */ 1591 reuse = MAT_REUSE_MATRIX; 1592 } 1593 } 1594 /* last check */ 1595 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1596 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1597 reuse = MAT_INITIAL_MATRIX; 1598 } 1599 } else { /* first time, so we need to create the matrix */ 1600 reuse = MAT_INITIAL_MATRIX; 1601 } 1602 /* extract A_RR */ 1603 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1604 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1605 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1606 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1607 if (matis->A == pcbddc->local_mat) { 1608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1609 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1610 } else { 1611 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1612 } 1613 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1614 if (matis->A == pcbddc->local_mat) { 1615 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1616 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1617 } else { 1618 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1619 } 1620 } 1621 if (!sub_schurs->reuse_mumps) { 1622 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1623 if (pcbddc->local_mat->symmetric_set) { 1624 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1625 } 1626 } else { 1627 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1628 1629 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1630 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1631 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1632 } 1633 if (!pcbddc->ksp_R) { /* create object if not present */ 1634 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1635 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1636 /* default */ 1637 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1638 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1639 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1640 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1641 if (issbaij) { 1642 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1643 } else { 1644 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1645 } 1646 /* Allow user's customization */ 1647 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1648 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1649 } 1650 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1651 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1652 if (!n_R) { 1653 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1654 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1655 } 1656 /* Reuse MUMPS solver if it is present */ 1657 if (sub_schurs->reuse_mumps) { 1658 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1659 1660 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1661 } 1662 /* Set Up KSP for Neumann problem of BDDC */ 1663 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1664 } 1665 /* free Neumann problem's matrix */ 1666 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1667 1668 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1669 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1670 if (pcbddc->dbg_flag) { 1671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1672 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1674 } 1675 if (dirichlet) { /* Dirichlet */ 1676 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1677 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1678 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1679 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1680 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1681 /* need to be adapted? */ 1682 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1683 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1684 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1685 /* print info */ 1686 if (pcbddc->dbg_flag) { 1687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1689 } 1690 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1691 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1692 } 1693 } 1694 if (neumann) { /* Neumann */ 1695 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1696 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1697 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1698 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1699 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1700 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1701 /* need to be adapted? */ 1702 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1703 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1704 /* print info */ 1705 if (pcbddc->dbg_flag) { 1706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1708 } 1709 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1710 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1711 } 1712 } 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1719 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1720 { 1721 PetscErrorCode ierr; 1722 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1723 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1724 1725 PetscFunctionBegin; 1726 if (!sub_schurs->reuse_mumps) { 1727 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1728 } 1729 if (!pcbddc->switch_static) { 1730 if (applytranspose && pcbddc->local_auxmat1) { 1731 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1732 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1733 } 1734 if (!sub_schurs->reuse_mumps) { 1735 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 } else { 1738 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1739 1740 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1741 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 } 1743 } else { 1744 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1747 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 if (applytranspose && pcbddc->local_auxmat1) { 1749 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1750 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1751 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 } 1754 } 1755 if (!sub_schurs->reuse_mumps) { 1756 if (applytranspose) { 1757 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1758 } else { 1759 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1760 } 1761 #if defined(PETSC_HAVE_MUMPS) 1762 } else { 1763 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1764 1765 if (applytranspose) { 1766 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1767 } else { 1768 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1769 } 1770 #endif 1771 } 1772 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1773 if (!pcbddc->switch_static) { 1774 if (!sub_schurs->reuse_mumps) { 1775 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 } else { 1778 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1779 1780 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1781 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1782 } 1783 if (!applytranspose && pcbddc->local_auxmat1) { 1784 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1785 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1786 } 1787 } else { 1788 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1789 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 if (!applytranspose && pcbddc->local_auxmat1) { 1793 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1794 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1795 } 1796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1799 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1807 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1808 { 1809 PetscErrorCode ierr; 1810 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1811 PC_IS* pcis = (PC_IS*) (pc->data); 1812 const PetscScalar zero = 0.0; 1813 1814 PetscFunctionBegin; 1815 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1816 if (applytranspose) { 1817 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1818 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1819 } else { 1820 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1821 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1822 } 1823 /* start communications from local primal nodes to rhs of coarse solver */ 1824 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1825 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1826 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1827 1828 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1829 /* TODO remove null space when doing multilevel */ 1830 if (pcbddc->coarse_ksp) { 1831 Vec rhs,sol; 1832 1833 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1834 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1835 if (applytranspose) { 1836 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1837 } else { 1838 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1839 } 1840 } 1841 1842 /* Local solution on R nodes */ 1843 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1844 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1845 } 1846 1847 /* communications from coarse sol to local primal nodes */ 1848 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1849 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 1851 /* Sum contributions from two levels */ 1852 if (applytranspose) { 1853 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1854 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1855 } else { 1856 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1857 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1864 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1865 { 1866 PetscErrorCode ierr; 1867 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1868 PetscScalar *array; 1869 Vec from,to; 1870 1871 PetscFunctionBegin; 1872 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1873 from = pcbddc->coarse_vec; 1874 to = pcbddc->vec1_P; 1875 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1876 Vec tvec; 1877 1878 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1879 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1880 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1881 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1882 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1883 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1884 } 1885 } else { /* from local to global -> put data in coarse right hand side */ 1886 from = pcbddc->vec1_P; 1887 to = pcbddc->coarse_vec; 1888 } 1889 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1895 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1896 { 1897 PetscErrorCode ierr; 1898 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1899 PetscScalar *array; 1900 Vec from,to; 1901 1902 PetscFunctionBegin; 1903 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1904 from = pcbddc->coarse_vec; 1905 to = pcbddc->vec1_P; 1906 } else { /* from local to global -> put data in coarse right hand side */ 1907 from = pcbddc->vec1_P; 1908 to = pcbddc->coarse_vec; 1909 } 1910 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1911 if (smode == SCATTER_FORWARD) { 1912 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1913 Vec tvec; 1914 1915 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1916 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1917 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1918 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1919 } 1920 } else { 1921 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1922 ierr = VecResetArray(from);CHKERRQ(ierr); 1923 } 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 /* uncomment for testing purposes */ 1929 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1932 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1933 { 1934 PetscErrorCode ierr; 1935 PC_IS* pcis = (PC_IS*)(pc->data); 1936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1937 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1938 /* one and zero */ 1939 PetscScalar one=1.0,zero=0.0; 1940 /* space to store constraints and their local indices */ 1941 PetscScalar *constraints_data; 1942 PetscInt *constraints_idxs,*constraints_idxs_B; 1943 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1944 PetscInt *constraints_n; 1945 /* iterators */ 1946 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1947 /* BLAS integers */ 1948 PetscBLASInt lwork,lierr; 1949 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1950 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1951 /* reuse */ 1952 PetscInt olocal_primal_size,olocal_primal_size_cc; 1953 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1954 /* change of basis */ 1955 PetscBool qr_needed; 1956 PetscBT change_basis,qr_needed_idx; 1957 /* auxiliary stuff */ 1958 PetscInt *nnz,*is_indices; 1959 PetscInt ncc; 1960 /* some quantities */ 1961 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1962 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1963 1964 PetscFunctionBegin; 1965 /* Destroy Mat objects computed previously */ 1966 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1967 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1968 /* save info on constraints from previous setup (if any) */ 1969 olocal_primal_size = pcbddc->local_primal_size; 1970 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1971 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1972 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1973 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1974 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1975 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1976 1977 /* print some info */ 1978 if (pcbddc->dbg_flag) { 1979 IS vertices; 1980 PetscInt nv,nedges,nfaces; 1981 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1982 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1983 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1984 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1990 } 1991 1992 if (!pcbddc->adaptive_selection) { 1993 IS ISForVertices,*ISForFaces,*ISForEdges; 1994 MatNullSpace nearnullsp; 1995 const Vec *nearnullvecs; 1996 Vec *localnearnullsp; 1997 PetscScalar *array; 1998 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1999 PetscBool nnsp_has_cnst; 2000 /* LAPACK working arrays for SVD or POD */ 2001 PetscBool skip_lapack,boolforchange; 2002 PetscScalar *work; 2003 PetscReal *singular_vals; 2004 #if defined(PETSC_USE_COMPLEX) 2005 PetscReal *rwork; 2006 #endif 2007 #if defined(PETSC_MISSING_LAPACK_GESVD) 2008 PetscScalar *temp_basis,*correlation_mat; 2009 #else 2010 PetscBLASInt dummy_int=1; 2011 PetscScalar dummy_scalar=1.; 2012 #endif 2013 2014 /* Get index sets for faces, edges and vertices from graph */ 2015 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2016 /* free unneeded index sets */ 2017 if (!pcbddc->use_vertices) { 2018 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2019 } 2020 if (!pcbddc->use_edges) { 2021 for (i=0;i<n_ISForEdges;i++) { 2022 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2023 } 2024 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2025 n_ISForEdges = 0; 2026 } 2027 if (!pcbddc->use_faces) { 2028 for (i=0;i<n_ISForFaces;i++) { 2029 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2032 n_ISForFaces = 0; 2033 } 2034 2035 #if defined(PETSC_USE_DEBUG) 2036 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2037 Also use_change_of_basis should be consistent among processors */ 2038 if (pcbddc->NullSpace) { 2039 PetscBool tbool[2],gbool[2]; 2040 2041 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2042 pcbddc->use_change_of_basis = PETSC_TRUE; 2043 if (!ISForEdges) { 2044 pcbddc->use_change_on_faces = PETSC_TRUE; 2045 } 2046 } 2047 tbool[0] = pcbddc->use_change_of_basis; 2048 tbool[1] = pcbddc->use_change_on_faces; 2049 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2050 pcbddc->use_change_of_basis = gbool[0]; 2051 pcbddc->use_change_on_faces = gbool[1]; 2052 } 2053 #endif 2054 2055 /* check if near null space is attached to global mat */ 2056 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2057 if (nearnullsp) { 2058 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2059 /* remove any stored info */ 2060 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2061 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2062 /* store information for BDDC solver reuse */ 2063 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2064 pcbddc->onearnullspace = nearnullsp; 2065 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2066 for (i=0;i<nnsp_size;i++) { 2067 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2068 } 2069 } else { /* if near null space is not provided BDDC uses constants by default */ 2070 nnsp_size = 0; 2071 nnsp_has_cnst = PETSC_TRUE; 2072 } 2073 /* get max number of constraints on a single cc */ 2074 max_constraints = nnsp_size; 2075 if (nnsp_has_cnst) max_constraints++; 2076 2077 /* 2078 Evaluate maximum storage size needed by the procedure 2079 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2080 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2081 There can be multiple constraints per connected component 2082 */ 2083 n_vertices = 0; 2084 if (ISForVertices) { 2085 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2086 } 2087 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2088 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2089 2090 total_counts = n_ISForFaces+n_ISForEdges; 2091 total_counts *= max_constraints; 2092 total_counts += n_vertices; 2093 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2094 2095 total_counts = 0; 2096 max_size_of_constraint = 0; 2097 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2098 IS used_is; 2099 if (i<n_ISForEdges) { 2100 used_is = ISForEdges[i]; 2101 } else { 2102 used_is = ISForFaces[i-n_ISForEdges]; 2103 } 2104 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2105 total_counts += j; 2106 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2107 } 2108 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2109 2110 /* get local part of global near null space vectors */ 2111 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2112 for (k=0;k<nnsp_size;k++) { 2113 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2114 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 } 2117 2118 /* whether or not to skip lapack calls */ 2119 skip_lapack = PETSC_TRUE; 2120 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2121 2122 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2123 if (!skip_lapack) { 2124 PetscScalar temp_work; 2125 2126 #if defined(PETSC_MISSING_LAPACK_GESVD) 2127 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2128 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2129 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2130 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2131 #if defined(PETSC_USE_COMPLEX) 2132 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2133 #endif 2134 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2135 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2137 lwork = -1; 2138 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2139 #if !defined(PETSC_USE_COMPLEX) 2140 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2141 #else 2142 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2143 #endif 2144 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2145 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2146 #else /* on missing GESVD */ 2147 /* SVD */ 2148 PetscInt max_n,min_n; 2149 max_n = max_size_of_constraint; 2150 min_n = max_constraints; 2151 if (max_size_of_constraint < max_constraints) { 2152 min_n = max_size_of_constraint; 2153 max_n = max_constraints; 2154 } 2155 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2156 #if defined(PETSC_USE_COMPLEX) 2157 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2158 #endif 2159 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2160 lwork = -1; 2161 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2162 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2163 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2164 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2165 #if !defined(PETSC_USE_COMPLEX) 2166 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2167 #else 2168 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2169 #endif 2170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2171 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2172 #endif /* on missing GESVD */ 2173 /* Allocate optimal workspace */ 2174 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2175 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2176 } 2177 /* Now we can loop on constraining sets */ 2178 total_counts = 0; 2179 constraints_idxs_ptr[0] = 0; 2180 constraints_data_ptr[0] = 0; 2181 /* vertices */ 2182 if (n_vertices) { 2183 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2184 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2185 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2186 for (i=0;i<n_vertices;i++) { 2187 constraints_n[total_counts] = 1; 2188 constraints_data[total_counts] = 1.0; 2189 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2190 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2191 total_counts++; 2192 } 2193 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2194 PetscBool used_vertex; 2195 for (i=0;i<n_vertices;i++) { 2196 used_vertex = PETSC_FALSE; 2197 k = 0; 2198 while (!used_vertex && k<nnsp_size) { 2199 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2200 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2201 constraints_n[total_counts] = 1; 2202 constraints_idxs[total_counts] = is_indices[i]; 2203 constraints_data[total_counts] = 1.0; 2204 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2205 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2206 total_counts++; 2207 used_vertex = PETSC_TRUE; 2208 } 2209 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2210 k++; 2211 } 2212 } 2213 } 2214 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2215 n_vertices = total_counts; 2216 } 2217 2218 /* edges and faces */ 2219 total_counts_cc = total_counts; 2220 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2221 IS used_is; 2222 PetscBool idxs_copied = PETSC_FALSE; 2223 2224 if (ncc<n_ISForEdges) { 2225 used_is = ISForEdges[ncc]; 2226 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2227 } else { 2228 used_is = ISForFaces[ncc-n_ISForEdges]; 2229 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2230 } 2231 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2232 2233 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2234 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2235 /* change of basis should not be performed on local periodic nodes */ 2236 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2237 if (nnsp_has_cnst) { 2238 PetscScalar quad_value; 2239 2240 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2241 idxs_copied = PETSC_TRUE; 2242 2243 if (!pcbddc->use_nnsp_true) { 2244 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2245 } else { 2246 quad_value = 1.0; 2247 } 2248 for (j=0;j<size_of_constraint;j++) { 2249 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2250 } 2251 temp_constraints++; 2252 total_counts++; 2253 } 2254 for (k=0;k<nnsp_size;k++) { 2255 PetscReal real_value; 2256 PetscScalar *ptr_to_data; 2257 2258 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2259 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2260 for (j=0;j<size_of_constraint;j++) { 2261 ptr_to_data[j] = array[is_indices[j]]; 2262 } 2263 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2264 /* check if array is null on the connected component */ 2265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2266 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2267 if (real_value > 0.0) { /* keep indices and values */ 2268 temp_constraints++; 2269 total_counts++; 2270 if (!idxs_copied) { 2271 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2272 idxs_copied = PETSC_TRUE; 2273 } 2274 } 2275 } 2276 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2277 valid_constraints = temp_constraints; 2278 if (!pcbddc->use_nnsp_true && temp_constraints) { 2279 if (temp_constraints == 1) { /* just normalize the constraint */ 2280 PetscScalar norm,*ptr_to_data; 2281 2282 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2284 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2285 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2286 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2287 } else { /* perform SVD */ 2288 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2289 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2290 2291 #if defined(PETSC_MISSING_LAPACK_GESVD) 2292 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2293 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2294 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2295 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2296 from that computed using LAPACKgesvd 2297 -> This is due to a different computation of eigenvectors in LAPACKheev 2298 -> The quality of the POD-computed basis will be the same */ 2299 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2300 /* Store upper triangular part of correlation matrix */ 2301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2303 for (j=0;j<temp_constraints;j++) { 2304 for (k=0;k<j+1;k++) { 2305 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2306 } 2307 } 2308 /* compute eigenvalues and eigenvectors of correlation matrix */ 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2311 #if !defined(PETSC_USE_COMPLEX) 2312 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2313 #else 2314 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2315 #endif 2316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2317 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2318 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2319 j = 0; 2320 while (j < temp_constraints && singular_vals[j] < tol) j++; 2321 total_counts = total_counts-j; 2322 valid_constraints = temp_constraints-j; 2323 /* scale and copy POD basis into used quadrature memory */ 2324 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2325 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2326 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2330 if (j<temp_constraints) { 2331 PetscInt ii; 2332 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2333 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2334 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2335 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2336 for (k=0;k<temp_constraints-j;k++) { 2337 for (ii=0;ii<size_of_constraint;ii++) { 2338 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2339 } 2340 } 2341 } 2342 #else /* on missing GESVD */ 2343 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2344 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2347 #if !defined(PETSC_USE_COMPLEX) 2348 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2349 #else 2350 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2351 #endif 2352 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2354 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2355 k = temp_constraints; 2356 if (k > size_of_constraint) k = size_of_constraint; 2357 j = 0; 2358 while (j < k && singular_vals[k-j-1] < tol) j++; 2359 valid_constraints = k-j; 2360 total_counts = total_counts-temp_constraints+valid_constraints; 2361 #endif /* on missing GESVD */ 2362 } 2363 } 2364 /* update pointers information */ 2365 if (valid_constraints) { 2366 constraints_n[total_counts_cc] = valid_constraints; 2367 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2368 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2369 /* set change_of_basis flag */ 2370 if (boolforchange) { 2371 PetscBTSet(change_basis,total_counts_cc); 2372 } 2373 total_counts_cc++; 2374 } 2375 } 2376 /* free workspace */ 2377 if (!skip_lapack) { 2378 ierr = PetscFree(work);CHKERRQ(ierr); 2379 #if defined(PETSC_USE_COMPLEX) 2380 ierr = PetscFree(rwork);CHKERRQ(ierr); 2381 #endif 2382 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2383 #if defined(PETSC_MISSING_LAPACK_GESVD) 2384 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2385 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2386 #endif 2387 } 2388 for (k=0;k<nnsp_size;k++) { 2389 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2390 } 2391 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2392 /* free index sets of faces, edges and vertices */ 2393 for (i=0;i<n_ISForFaces;i++) { 2394 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2395 } 2396 if (n_ISForFaces) { 2397 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2398 } 2399 for (i=0;i<n_ISForEdges;i++) { 2400 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2401 } 2402 if (n_ISForEdges) { 2403 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2404 } 2405 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2406 } else { 2407 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2408 2409 total_counts = 0; 2410 n_vertices = 0; 2411 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2412 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2413 } 2414 max_constraints = 0; 2415 total_counts_cc = 0; 2416 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2417 total_counts += pcbddc->adaptive_constraints_n[i]; 2418 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2419 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2420 } 2421 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2422 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2423 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2424 constraints_data = pcbddc->adaptive_constraints_data; 2425 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2426 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2427 total_counts_cc = 0; 2428 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2429 if (pcbddc->adaptive_constraints_n[i]) { 2430 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2431 } 2432 } 2433 #if 0 2434 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2435 for (i=0;i<total_counts_cc;i++) { 2436 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2437 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2438 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2439 printf(" %d",constraints_idxs[j]); 2440 } 2441 printf("\n"); 2442 printf("number of cc: %d\n",constraints_n[i]); 2443 } 2444 for (i=0;i<n_vertices;i++) { 2445 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2446 } 2447 for (i=0;i<sub_schurs->n_subs;i++) { 2448 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2449 } 2450 #endif 2451 2452 max_size_of_constraint = 0; 2453 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2454 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2455 /* Change of basis */ 2456 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2457 if (pcbddc->use_change_of_basis) { 2458 for (i=0;i<sub_schurs->n_subs;i++) { 2459 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2460 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2461 } 2462 } 2463 } 2464 } 2465 pcbddc->local_primal_size = total_counts; 2466 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2467 2468 /* map constraints_idxs in boundary numbering */ 2469 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2470 if (i != constraints_idxs_ptr[total_counts_cc]) { 2471 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2472 } 2473 2474 /* Create constraint matrix */ 2475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2476 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2477 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2478 2479 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2480 /* determine if a QR strategy is needed for change of basis */ 2481 qr_needed = PETSC_FALSE; 2482 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2483 total_primal_vertices=0; 2484 pcbddc->local_primal_size_cc = 0; 2485 for (i=0;i<total_counts_cc;i++) { 2486 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2487 if (size_of_constraint == 1) { 2488 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2489 pcbddc->local_primal_size_cc += 1; 2490 } else if (PetscBTLookup(change_basis,i)) { 2491 for (k=0;k<constraints_n[i];k++) { 2492 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2493 } 2494 pcbddc->local_primal_size_cc += constraints_n[i]; 2495 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2496 PetscBTSet(qr_needed_idx,i); 2497 qr_needed = PETSC_TRUE; 2498 } 2499 } else { 2500 pcbddc->local_primal_size_cc += 1; 2501 } 2502 } 2503 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2504 pcbddc->n_vertices = total_primal_vertices; 2505 /* permute indices in order to have a sorted set of vertices */ 2506 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2507 2508 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2509 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2510 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2511 2512 /* nonzero structure of constraint matrix */ 2513 /* and get reference dof for local constraints */ 2514 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2515 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2516 2517 j = total_primal_vertices; 2518 total_counts = total_primal_vertices; 2519 cum = total_primal_vertices; 2520 for (i=n_vertices;i<total_counts_cc;i++) { 2521 if (!PetscBTLookup(change_basis,i)) { 2522 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2523 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2524 cum++; 2525 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2526 for (k=0;k<constraints_n[i];k++) { 2527 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2528 nnz[j+k] = size_of_constraint; 2529 } 2530 j += constraints_n[i]; 2531 } 2532 } 2533 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2534 ierr = PetscFree(nnz);CHKERRQ(ierr); 2535 2536 /* set values in constraint matrix */ 2537 for (i=0;i<total_primal_vertices;i++) { 2538 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2539 } 2540 total_counts = total_primal_vertices; 2541 for (i=n_vertices;i<total_counts_cc;i++) { 2542 if (!PetscBTLookup(change_basis,i)) { 2543 PetscInt *cols; 2544 2545 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2546 cols = constraints_idxs+constraints_idxs_ptr[i]; 2547 for (k=0;k<constraints_n[i];k++) { 2548 PetscInt row = total_counts+k; 2549 PetscScalar *vals; 2550 2551 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2552 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2553 } 2554 total_counts += constraints_n[i]; 2555 } 2556 } 2557 /* assembling */ 2558 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 2561 /* 2562 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2563 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2564 */ 2565 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2566 if (pcbddc->use_change_of_basis) { 2567 /* dual and primal dofs on a single cc */ 2568 PetscInt dual_dofs,primal_dofs; 2569 /* working stuff for GEQRF */ 2570 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2571 PetscBLASInt lqr_work; 2572 /* working stuff for UNGQR */ 2573 PetscScalar *gqr_work,lgqr_work_t; 2574 PetscBLASInt lgqr_work; 2575 /* working stuff for TRTRS */ 2576 PetscScalar *trs_rhs; 2577 PetscBLASInt Blas_NRHS; 2578 /* pointers for values insertion into change of basis matrix */ 2579 PetscInt *start_rows,*start_cols; 2580 PetscScalar *start_vals; 2581 /* working stuff for values insertion */ 2582 PetscBT is_primal; 2583 PetscInt *aux_primal_numbering_B; 2584 /* matrix sizes */ 2585 PetscInt global_size,local_size; 2586 /* temporary change of basis */ 2587 Mat localChangeOfBasisMatrix; 2588 /* extra space for debugging */ 2589 PetscScalar *dbg_work; 2590 2591 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2592 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2593 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2594 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2595 /* nonzeros for local mat */ 2596 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2597 for (i=0;i<pcis->n;i++) nnz[i]=1; 2598 for (i=n_vertices;i<total_counts_cc;i++) { 2599 if (PetscBTLookup(change_basis,i)) { 2600 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2601 if (PetscBTLookup(qr_needed_idx,i)) { 2602 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2603 } else { 2604 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2605 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2606 } 2607 } 2608 } 2609 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2610 ierr = PetscFree(nnz);CHKERRQ(ierr); 2611 /* Set initial identity in the matrix */ 2612 for (i=0;i<pcis->n;i++) { 2613 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2614 } 2615 2616 if (pcbddc->dbg_flag) { 2617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2619 } 2620 2621 2622 /* Now we loop on the constraints which need a change of basis */ 2623 /* 2624 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2625 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2626 2627 Basic blocks of change of basis matrix T computed by 2628 2629 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2630 2631 | 1 0 ... 0 s_1/S | 2632 | 0 1 ... 0 s_2/S | 2633 | ... | 2634 | 0 ... 1 s_{n-1}/S | 2635 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2636 2637 with S = \sum_{i=1}^n s_i^2 2638 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2639 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2640 2641 - QR decomposition of constraints otherwise 2642 */ 2643 if (qr_needed) { 2644 /* space to store Q */ 2645 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2646 /* first we issue queries for optimal work */ 2647 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2648 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2650 lqr_work = -1; 2651 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2652 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2653 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2654 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2655 lgqr_work = -1; 2656 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2657 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2658 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2659 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2660 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2661 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2662 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2663 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2664 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2665 /* array to store scaling factors for reflectors */ 2666 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2667 /* array to store rhs and solution of triangular solver */ 2668 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2669 /* allocating workspace for check */ 2670 if (pcbddc->dbg_flag) { 2671 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2672 } 2673 } 2674 /* array to store whether a node is primal or not */ 2675 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2676 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2677 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2678 if (i != total_primal_vertices) { 2679 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2680 } 2681 for (i=0;i<total_primal_vertices;i++) { 2682 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2683 } 2684 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2685 2686 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2687 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2688 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2689 if (PetscBTLookup(change_basis,total_counts)) { 2690 /* get constraint info */ 2691 primal_dofs = constraints_n[total_counts]; 2692 dual_dofs = size_of_constraint-primal_dofs; 2693 2694 if (pcbddc->dbg_flag) { 2695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2696 } 2697 2698 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2699 2700 /* copy quadrature constraints for change of basis check */ 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2703 } 2704 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2705 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2706 2707 /* compute QR decomposition of constraints */ 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2712 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2713 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2715 2716 /* explictly compute R^-T */ 2717 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2718 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2719 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2720 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2723 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2724 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2725 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2726 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2727 2728 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2735 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2736 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2737 2738 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2739 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2740 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2741 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2742 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2743 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2747 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2748 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2749 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2750 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2751 2752 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2753 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2754 /* insert cols for primal dofs */ 2755 for (j=0;j<primal_dofs;j++) { 2756 start_vals = &qr_basis[j*size_of_constraint]; 2757 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2758 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2759 } 2760 /* insert cols for dual dofs */ 2761 for (j=0,k=0;j<dual_dofs;k++) { 2762 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2763 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2764 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2765 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2766 j++; 2767 } 2768 } 2769 2770 /* check change of basis */ 2771 if (pcbddc->dbg_flag) { 2772 PetscInt ii,jj; 2773 PetscBool valid_qr=PETSC_TRUE; 2774 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2775 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2776 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2780 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2781 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2782 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2783 for (jj=0;jj<size_of_constraint;jj++) { 2784 for (ii=0;ii<primal_dofs;ii++) { 2785 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2786 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2787 } 2788 } 2789 if (!valid_qr) { 2790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2791 for (jj=0;jj<size_of_constraint;jj++) { 2792 for (ii=0;ii<primal_dofs;ii++) { 2793 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2794 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2795 } 2796 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2797 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2798 } 2799 } 2800 } 2801 } else { 2802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2803 } 2804 } 2805 } else { /* simple transformation block */ 2806 PetscInt row,col; 2807 PetscScalar val,norm; 2808 2809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2810 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2811 for (j=0;j<size_of_constraint;j++) { 2812 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2813 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2814 if (!PetscBTLookup(is_primal,row_B)) { 2815 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2816 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2817 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2818 } else { 2819 for (k=0;k<size_of_constraint;k++) { 2820 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2821 if (row != col) { 2822 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2823 } else { 2824 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2825 } 2826 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2827 } 2828 } 2829 } 2830 if (pcbddc->dbg_flag) { 2831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2832 } 2833 } 2834 } else { 2835 if (pcbddc->dbg_flag) { 2836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2837 } 2838 } 2839 } 2840 2841 /* free workspace */ 2842 if (qr_needed) { 2843 if (pcbddc->dbg_flag) { 2844 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2845 } 2846 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2847 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2848 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2849 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2850 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2851 } 2852 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2853 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 2856 /* assembling of global change of variable */ 2857 { 2858 Mat tmat; 2859 PetscInt bs; 2860 2861 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2862 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2863 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2864 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2865 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2867 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2868 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2869 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2870 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2871 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2872 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2873 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2874 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2875 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2876 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2878 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2879 } 2880 /* check */ 2881 if (pcbddc->dbg_flag) { 2882 PetscReal error; 2883 Vec x,x_change; 2884 2885 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2886 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2887 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2888 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2889 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2895 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2896 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2898 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2899 ierr = VecDestroy(&x);CHKERRQ(ierr); 2900 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2901 } 2902 2903 /* adapt sub_schurs computed (if any) */ 2904 if (pcbddc->use_deluxe_scaling) { 2905 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2906 if (sub_schurs->S_Ej_all) { 2907 Mat S_new,tmat; 2908 IS is_all_N; 2909 2910 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2911 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2912 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2913 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2914 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2915 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2916 sub_schurs->S_Ej_all = S_new; 2917 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2918 if (sub_schurs->sum_S_Ej_all) { 2919 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2920 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2921 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2922 sub_schurs->sum_S_Ej_all = S_new; 2923 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2924 } 2925 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2926 } 2927 } 2928 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2929 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2930 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2931 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2932 } 2933 2934 /* set up change of basis context */ 2935 if (pcbddc->ChangeOfBasisMatrix) { 2936 PCBDDCChange_ctx change_ctx; 2937 2938 if (!pcbddc->new_global_mat) { 2939 PetscInt global_size,local_size; 2940 2941 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2942 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2944 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2945 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2946 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2947 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2948 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2949 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2950 } else { 2951 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2952 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2953 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2954 } 2955 if (!pcbddc->user_ChangeOfBasisMatrix) { 2956 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2957 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2958 } else { 2959 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2960 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2961 } 2962 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2963 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2964 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2965 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 } 2967 2968 /* check if a new primal space has been introduced */ 2969 pcbddc->new_primal_space_local = PETSC_TRUE; 2970 if (olocal_primal_size == pcbddc->local_primal_size) { 2971 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2972 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2973 if (!pcbddc->new_primal_space_local) { 2974 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2975 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2976 } 2977 } 2978 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2979 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2980 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2981 2982 /* flush dbg viewer */ 2983 if (pcbddc->dbg_flag) { 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 } 2986 2987 /* free workspace */ 2988 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2989 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2990 if (!pcbddc->adaptive_selection) { 2991 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2992 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2993 } else { 2994 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2995 pcbddc->adaptive_constraints_idxs_ptr, 2996 pcbddc->adaptive_constraints_data_ptr, 2997 pcbddc->adaptive_constraints_idxs, 2998 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2999 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3000 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3007 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3008 { 3009 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3010 PC_IS *pcis = (PC_IS*)pc->data; 3011 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3012 PetscInt ierr,i,vertex_size,N; 3013 PetscViewer viewer=pcbddc->dbg_viewer; 3014 3015 PetscFunctionBegin; 3016 /* Reset previously computed graph */ 3017 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3018 /* Init local Graph struct */ 3019 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3020 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3021 3022 /* Check validity of the csr graph passed in by the user */ 3023 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3024 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3025 } 3026 3027 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3028 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3029 PetscInt *xadj,*adjncy; 3030 PetscInt nvtxs; 3031 PetscBool flg_row=PETSC_FALSE; 3032 3033 if (pcbddc->use_local_adj) { 3034 3035 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3036 if (flg_row) { 3037 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3038 pcbddc->computed_rowadj = PETSC_TRUE; 3039 } 3040 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3041 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3042 IS is_dummy; 3043 ISLocalToGlobalMapping l2gmap_dummy; 3044 PetscInt j,sum; 3045 PetscInt *cxadj,*cadjncy; 3046 const PetscInt *idxs; 3047 PCBDDCGraph graph; 3048 PetscBT is_on_boundary; 3049 3050 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3051 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3052 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3053 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3054 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3055 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3056 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3057 if (flg_row) { 3058 graph->xadj = xadj; 3059 graph->adjncy = adjncy; 3060 } 3061 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3062 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3063 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3064 3065 if (pcbddc->dbg_flag) { 3066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3067 for (i=0;i<graph->ncc;i++) { 3068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3069 } 3070 } 3071 3072 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3073 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3074 for (i=0;i<pcis->n_B;i++) { 3075 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3076 } 3077 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3078 3079 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3080 sum = 0; 3081 for (i=0;i<graph->ncc;i++) { 3082 PetscInt sizecc = 0; 3083 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3084 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3085 sizecc++; 3086 } 3087 } 3088 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3089 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3090 cxadj[graph->queue[j]] = sizecc; 3091 } 3092 } 3093 sum += sizecc*sizecc; 3094 } 3095 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3096 sum = 0; 3097 for (i=0;i<pcis->n;i++) { 3098 PetscInt temp = cxadj[i]; 3099 cxadj[i] = sum; 3100 sum += temp; 3101 } 3102 cxadj[pcis->n] = sum; 3103 for (i=0;i<graph->ncc;i++) { 3104 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3105 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3106 PetscInt k,sizecc = 0; 3107 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3108 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3109 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3110 sizecc++; 3111 } 3112 } 3113 } 3114 } 3115 } 3116 if (sum) { 3117 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3118 } else { 3119 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3120 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3121 } 3122 graph->xadj = 0; 3123 graph->adjncy = 0; 3124 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3125 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3126 } 3127 } 3128 if (pcbddc->dbg_flag) { 3129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3130 } 3131 3132 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3133 vertex_size = 1; 3134 if (pcbddc->user_provided_isfordofs) { 3135 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3136 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3137 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3138 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3139 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3140 } 3141 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3142 pcbddc->n_ISForDofs = 0; 3143 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3144 } 3145 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3146 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3147 } else { 3148 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3149 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3150 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3151 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3152 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3153 } 3154 } 3155 } 3156 3157 /* Setup of Graph */ 3158 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3159 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3160 } 3161 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3162 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3163 } 3164 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3165 3166 /* Graph's connected components analysis */ 3167 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3168 3169 /* print some info to stdout */ 3170 if (pcbddc->dbg_flag) { 3171 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3172 } 3173 3174 /* mark topography has done */ 3175 pcbddc->recompute_topography = PETSC_FALSE; 3176 PetscFunctionReturn(0); 3177 } 3178 3179 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCSubsetNumbering" 3182 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3183 { 3184 PetscSF sf; 3185 PetscLayout map; 3186 const PetscInt *idxs; 3187 PetscInt *leaf_data,*root_data,*gidxs; 3188 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3189 PetscInt n_n,nlocals,start,first_index; 3190 PetscMPIInt commsize; 3191 PetscBool first_found; 3192 PetscErrorCode ierr; 3193 3194 PetscFunctionBegin; 3195 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3196 if (subset_mult) { 3197 PetscCheckSameComm(subset,1,subset_mult,2); 3198 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3199 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3200 } 3201 /* create workspace layout for computing global indices of subset */ 3202 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3203 lbounds[0] = lbounds[1] = 0; 3204 for (i=0;i<n;i++) { 3205 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3206 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3207 } 3208 lbounds[0] = -lbounds[0]; 3209 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3210 gbounds[0] = -gbounds[0]; 3211 N = gbounds[1] - gbounds[0] + 1; 3212 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3213 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3215 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3216 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3217 3218 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3219 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3220 if (subset_mult) { 3221 const PetscInt* idxs_mult; 3222 3223 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3224 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3225 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3226 } else { 3227 for (i=0;i<n;i++) leaf_data[i] = 1; 3228 } 3229 /* local size of new subset */ 3230 n_n = 0; 3231 for (i=0;i<n;i++) n_n += leaf_data[i]; 3232 3233 /* global indexes in layout */ 3234 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3235 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3236 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3237 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3238 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3239 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3240 3241 /* reduce from leaves to roots */ 3242 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3243 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3244 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPIU_MAX);CHKERRQ(ierr); 3245 3246 /* count indexes in local part of layout */ 3247 nlocals = 0; 3248 first_index = -1; 3249 first_found = PETSC_FALSE; 3250 for (i=0;i<Nl;i++) { 3251 if (!first_found && root_data[i]) { 3252 first_found = PETSC_TRUE; 3253 first_index = i; 3254 } 3255 nlocals += root_data[i]; 3256 } 3257 3258 /* cumulative of number of indexes and size of subset without holes */ 3259 start = 0; 3260 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3261 if (N_n) { /* compute total size of new subset if requested */ 3262 *N_n = start + nlocals; 3263 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3264 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3265 } 3266 /* adapt roof data with cumulative */ 3267 if (first_found) { 3268 PetscInt old_index; 3269 3270 root_data[first_index] += start; 3271 old_index = first_index; 3272 for (i=first_index+1;i<Nl;i++) { 3273 if (root_data[i]) { 3274 root_data[i] += root_data[old_index]; 3275 old_index = i; 3276 } 3277 } 3278 } 3279 3280 /* from roots to leaves */ 3281 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3282 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3283 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3284 3285 /* create new IS with global indexes without holes */ 3286 if (subset_mult) { 3287 const PetscInt* idxs_mult; 3288 PetscInt cum; 3289 3290 cum = 0; 3291 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3292 for (i=0;i<n;i++) { 3293 PetscInt j; 3294 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3295 } 3296 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3297 } else { 3298 for (i=0;i<n;i++) { 3299 gidxs[i] = leaf_data[i]-1; 3300 } 3301 } 3302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3303 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3309 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3310 { 3311 PetscInt i,j; 3312 PetscScalar *alphas; 3313 PetscErrorCode ierr; 3314 3315 PetscFunctionBegin; 3316 /* this implements stabilized Gram-Schmidt */ 3317 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3318 for (i=0;i<n;i++) { 3319 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3320 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3321 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3322 } 3323 ierr = PetscFree(alphas);CHKERRQ(ierr); 3324 PetscFunctionReturn(0); 3325 } 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatISGetSubassemblingPattern" 3329 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3330 { 3331 Mat subdomain_adj; 3332 IS new_ranks,ranks_send_to; 3333 MatPartitioning partitioner; 3334 Mat_IS *matis; 3335 PetscInt n_neighs,*neighs,*n_shared,**shared; 3336 PetscInt prank; 3337 PetscMPIInt size,rank,color; 3338 PetscInt *xadj,*adjncy,*oldranks; 3339 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3340 PetscInt i,local_size,threshold=0; 3341 PetscErrorCode ierr; 3342 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3343 PetscSubcomm subcomm; 3344 3345 PetscFunctionBegin; 3346 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3347 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3348 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3349 3350 /* Get info on mapping */ 3351 matis = (Mat_IS*)(mat->data); 3352 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3353 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3354 3355 /* build local CSR graph of subdomains' connectivity */ 3356 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3357 xadj[0] = 0; 3358 xadj[1] = PetscMax(n_neighs-1,0); 3359 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3360 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3361 3362 if (threshold) { 3363 PetscInt xadj_count = 0; 3364 for (i=1;i<n_neighs;i++) { 3365 if (n_shared[i] > threshold) { 3366 adjncy[xadj_count] = neighs[i]; 3367 adjncy_wgt[xadj_count] = n_shared[i]; 3368 xadj_count++; 3369 } 3370 } 3371 xadj[1] = xadj_count; 3372 } else { 3373 if (xadj[1]) { 3374 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3375 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3376 } 3377 } 3378 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3379 if (use_square) { 3380 for (i=0;i<xadj[1];i++) { 3381 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3382 } 3383 } 3384 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3385 3386 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3387 3388 /* 3389 Restrict work on active processes only. 3390 */ 3391 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3392 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3393 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3394 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3395 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3396 if (color) { 3397 ierr = PetscFree(xadj);CHKERRQ(ierr); 3398 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3399 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3400 } else { 3401 PetscInt rstart=0,rend=0; 3402 PetscBool aggregate; 3403 3404 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3405 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3406 prank = rank; 3407 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3408 /* 3409 for (i=0;i<size;i++) { 3410 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3411 } 3412 */ 3413 for (i=0;i<xadj[1];i++) { 3414 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3415 } 3416 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3417 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3418 if (aggregate) { 3419 PetscInt lrows,row,ncols,*cols; 3420 PetscMPIInt nrank; 3421 PetscScalar *vals; 3422 3423 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3424 lrows = 0; 3425 if (nrank<redprocs) { 3426 lrows = size/redprocs; 3427 if (nrank<size%redprocs) lrows++; 3428 } 3429 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,100,NULL,100,NULL,&subdomain_adj);CHKERRQ(ierr); 3430 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3431 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3432 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3433 row = nrank; 3434 ncols = xadj[1]-xadj[0]; 3435 cols = adjncy; 3436 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3437 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3438 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3439 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3440 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3441 ierr = PetscFree(vals);CHKERRQ(ierr); 3442 } else { 3443 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3444 } 3445 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3446 3447 /* Partition */ 3448 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3449 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3450 if (use_vwgt) { 3451 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3452 v_wgt[0] = local_size; 3453 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3454 } 3455 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3456 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3457 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3458 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3459 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3460 3461 /* TODO: renumber new_ranks to avoid "holes" in recv processors */ 3462 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3463 if (!redprocs) { 3464 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3465 } else { 3466 PetscInt idxs[1]; 3467 PetscMPIInt tag; 3468 MPI_Request *reqs; 3469 3470 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3471 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3472 for (i=rstart;i<rend;i++) { 3473 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3474 } 3475 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3476 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3477 ierr = PetscFree(reqs);CHKERRQ(ierr); 3478 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3479 } 3480 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3481 /* clean up */ 3482 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3483 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3484 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3485 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3486 } 3487 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3488 3489 /* assemble parallel IS for sends */ 3490 i = 1; 3491 if (color) i=0; 3492 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3493 /* get back IS */ 3494 *is_sends = ranks_send_to; 3495 PetscFunctionReturn(0); 3496 } 3497 3498 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3499 3500 #undef __FUNCT__ 3501 #define __FUNCT__ "MatISSubassemble" 3502 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3503 { 3504 Mat local_mat; 3505 Mat_IS *matis; 3506 IS is_sends_internal; 3507 PetscInt rows,cols,new_local_rows; 3508 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3509 PetscBool ismatis,isdense,newisdense,destroy_mat; 3510 ISLocalToGlobalMapping l2gmap; 3511 PetscInt* l2gmap_indices; 3512 const PetscInt* is_indices; 3513 MatType new_local_type; 3514 /* buffers */ 3515 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3516 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3517 PetscInt *recv_buffer_idxs_local; 3518 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3519 /* MPI */ 3520 MPI_Comm comm,comm_n; 3521 PetscSubcomm subcomm; 3522 PetscMPIInt n_sends,n_recvs,commsize; 3523 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3524 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3525 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3526 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3527 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3528 PetscErrorCode ierr; 3529 3530 PetscFunctionBegin; 3531 /* TODO: add missing checks */ 3532 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3533 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3534 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3535 PetscValidLogicalCollectiveInt(mat,nis,7); 3536 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3537 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3538 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3539 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3540 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3541 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3542 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3543 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3544 PetscInt mrows,mcols,mnrows,mncols; 3545 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3546 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3547 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3548 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3549 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3550 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3551 } 3552 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3553 PetscValidLogicalCollectiveInt(mat,bs,0); 3554 /* prepare IS for sending if not provided */ 3555 if (!is_sends) { 3556 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3557 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3558 } else { 3559 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3560 is_sends_internal = is_sends; 3561 } 3562 3563 /* get pointer of MATIS data */ 3564 matis = (Mat_IS*)mat->data; 3565 3566 /* get comm */ 3567 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3568 3569 /* compute number of sends */ 3570 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3571 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3572 3573 /* compute number of receives */ 3574 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3575 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3576 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3577 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3578 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3579 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3580 ierr = PetscFree(iflags);CHKERRQ(ierr); 3581 3582 /* restrict comm if requested */ 3583 subcomm = 0; 3584 destroy_mat = PETSC_FALSE; 3585 if (restrict_comm) { 3586 PetscMPIInt color,subcommsize; 3587 3588 color = 0; 3589 if (restrict_full) { 3590 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3591 } else { 3592 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3593 } 3594 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3595 subcommsize = commsize - subcommsize; 3596 /* check if reuse has been requested */ 3597 if (reuse == MAT_REUSE_MATRIX) { 3598 if (*mat_n) { 3599 PetscMPIInt subcommsize2; 3600 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3601 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3602 comm_n = PetscObjectComm((PetscObject)*mat_n); 3603 } else { 3604 comm_n = PETSC_COMM_SELF; 3605 } 3606 } else { /* MAT_INITIAL_MATRIX */ 3607 PetscMPIInt rank; 3608 3609 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3610 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3611 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3612 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3613 comm_n = PetscSubcommChild(subcomm); 3614 } 3615 /* flag to destroy *mat_n if not significative */ 3616 if (color) destroy_mat = PETSC_TRUE; 3617 } else { 3618 comm_n = comm; 3619 } 3620 3621 /* prepare send/receive buffers */ 3622 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3623 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3624 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3625 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3626 if (nis) { 3627 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3628 } 3629 3630 /* Get data from local matrices */ 3631 if (!isdense) { 3632 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3633 /* TODO: See below some guidelines on how to prepare the local buffers */ 3634 /* 3635 send_buffer_vals should contain the raw values of the local matrix 3636 send_buffer_idxs should contain: 3637 - MatType_PRIVATE type 3638 - PetscInt size_of_l2gmap 3639 - PetscInt global_row_indices[size_of_l2gmap] 3640 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3641 */ 3642 } else { 3643 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3644 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3645 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3646 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3647 send_buffer_idxs[1] = i; 3648 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3649 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3650 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3651 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3652 for (i=0;i<n_sends;i++) { 3653 ilengths_vals[is_indices[i]] = len*len; 3654 ilengths_idxs[is_indices[i]] = len+2; 3655 } 3656 } 3657 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3658 /* additional is (if any) */ 3659 if (nis) { 3660 PetscMPIInt psum; 3661 PetscInt j; 3662 for (j=0,psum=0;j<nis;j++) { 3663 PetscInt plen; 3664 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3665 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3666 psum += len+1; /* indices + lenght */ 3667 } 3668 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3669 for (j=0,psum=0;j<nis;j++) { 3670 PetscInt plen; 3671 const PetscInt *is_array_idxs; 3672 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3673 send_buffer_idxs_is[psum] = plen; 3674 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3675 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3676 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3677 psum += plen+1; /* indices + lenght */ 3678 } 3679 for (i=0;i<n_sends;i++) { 3680 ilengths_idxs_is[is_indices[i]] = psum; 3681 } 3682 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3683 } 3684 3685 buf_size_idxs = 0; 3686 buf_size_vals = 0; 3687 buf_size_idxs_is = 0; 3688 for (i=0;i<n_recvs;i++) { 3689 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3690 buf_size_vals += (PetscInt)olengths_vals[i]; 3691 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3692 } 3693 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3694 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3695 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3696 3697 /* get new tags for clean communications */ 3698 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3699 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3700 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3701 3702 /* allocate for requests */ 3703 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3704 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3705 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3706 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3707 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3708 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3709 3710 /* communications */ 3711 ptr_idxs = recv_buffer_idxs; 3712 ptr_vals = recv_buffer_vals; 3713 ptr_idxs_is = recv_buffer_idxs_is; 3714 for (i=0;i<n_recvs;i++) { 3715 source_dest = onodes[i]; 3716 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3717 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3718 ptr_idxs += olengths_idxs[i]; 3719 ptr_vals += olengths_vals[i]; 3720 if (nis) { 3721 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3722 ptr_idxs_is += olengths_idxs_is[i]; 3723 } 3724 } 3725 for (i=0;i<n_sends;i++) { 3726 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3727 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3728 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3729 if (nis) { 3730 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3731 } 3732 } 3733 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3734 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3735 3736 /* assemble new l2g map */ 3737 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3738 ptr_idxs = recv_buffer_idxs; 3739 new_local_rows = 0; 3740 for (i=0;i<n_recvs;i++) { 3741 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3742 ptr_idxs += olengths_idxs[i]; 3743 } 3744 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3745 ptr_idxs = recv_buffer_idxs; 3746 new_local_rows = 0; 3747 for (i=0;i<n_recvs;i++) { 3748 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3749 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3750 ptr_idxs += olengths_idxs[i]; 3751 } 3752 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3753 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3754 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3755 3756 /* infer new local matrix type from received local matrices type */ 3757 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3758 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3759 if (n_recvs) { 3760 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3761 ptr_idxs = recv_buffer_idxs; 3762 for (i=0;i<n_recvs;i++) { 3763 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3764 new_local_type_private = MATAIJ_PRIVATE; 3765 break; 3766 } 3767 ptr_idxs += olengths_idxs[i]; 3768 } 3769 switch (new_local_type_private) { 3770 case MATDENSE_PRIVATE: 3771 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3772 new_local_type = MATSEQAIJ; 3773 bs = 1; 3774 } else { /* if I receive only 1 dense matrix */ 3775 new_local_type = MATSEQDENSE; 3776 bs = 1; 3777 } 3778 break; 3779 case MATAIJ_PRIVATE: 3780 new_local_type = MATSEQAIJ; 3781 bs = 1; 3782 break; 3783 case MATBAIJ_PRIVATE: 3784 new_local_type = MATSEQBAIJ; 3785 break; 3786 case MATSBAIJ_PRIVATE: 3787 new_local_type = MATSEQSBAIJ; 3788 break; 3789 default: 3790 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3791 break; 3792 } 3793 } else { /* by default, new_local_type is seqdense */ 3794 new_local_type = MATSEQDENSE; 3795 bs = 1; 3796 } 3797 3798 /* create MATIS object if needed */ 3799 if (reuse == MAT_INITIAL_MATRIX) { 3800 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3801 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3802 } else { 3803 /* it also destroys the local matrices */ 3804 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3805 } 3806 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3807 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3808 3809 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3810 3811 /* Global to local map of received indices */ 3812 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3813 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3814 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3815 3816 /* restore attributes -> type of incoming data and its size */ 3817 buf_size_idxs = 0; 3818 for (i=0;i<n_recvs;i++) { 3819 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3820 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3821 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3822 } 3823 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3824 3825 /* set preallocation */ 3826 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3827 if (!newisdense) { 3828 PetscInt *new_local_nnz=0; 3829 3830 ptr_vals = recv_buffer_vals; 3831 ptr_idxs = recv_buffer_idxs_local; 3832 if (n_recvs) { 3833 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3834 } 3835 for (i=0;i<n_recvs;i++) { 3836 PetscInt j; 3837 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3838 for (j=0;j<*(ptr_idxs+1);j++) { 3839 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3840 } 3841 } else { 3842 /* TODO */ 3843 } 3844 ptr_idxs += olengths_idxs[i]; 3845 } 3846 if (new_local_nnz) { 3847 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3848 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3849 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3850 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3851 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3852 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3853 } else { 3854 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3855 } 3856 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3857 } else { 3858 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3859 } 3860 3861 /* set values */ 3862 ptr_vals = recv_buffer_vals; 3863 ptr_idxs = recv_buffer_idxs_local; 3864 for (i=0;i<n_recvs;i++) { 3865 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3866 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3867 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3868 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3869 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3870 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3871 } else { 3872 /* TODO */ 3873 } 3874 ptr_idxs += olengths_idxs[i]; 3875 ptr_vals += olengths_vals[i]; 3876 } 3877 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3881 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3882 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3883 3884 #if 0 3885 if (!restrict_comm) { /* check */ 3886 Vec lvec,rvec; 3887 PetscReal infty_error; 3888 3889 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3890 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3891 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3892 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3893 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3894 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3895 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3896 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3897 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3898 } 3899 #endif 3900 3901 /* assemble new additional is (if any) */ 3902 if (nis) { 3903 PetscInt **temp_idxs,*count_is,j,psum; 3904 3905 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3906 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3907 ptr_idxs = recv_buffer_idxs_is; 3908 psum = 0; 3909 for (i=0;i<n_recvs;i++) { 3910 for (j=0;j<nis;j++) { 3911 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3912 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3913 psum += plen; 3914 ptr_idxs += plen+1; /* shift pointer to received data */ 3915 } 3916 } 3917 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3918 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3919 for (i=1;i<nis;i++) { 3920 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3921 } 3922 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3923 ptr_idxs = recv_buffer_idxs_is; 3924 for (i=0;i<n_recvs;i++) { 3925 for (j=0;j<nis;j++) { 3926 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3927 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3928 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3929 ptr_idxs += plen+1; /* shift pointer to received data */ 3930 } 3931 } 3932 for (i=0;i<nis;i++) { 3933 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3934 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3935 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3936 } 3937 ierr = PetscFree(count_is);CHKERRQ(ierr); 3938 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3939 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3940 } 3941 /* free workspace */ 3942 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3943 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3944 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3945 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3946 if (isdense) { 3947 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3948 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3949 } else { 3950 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3951 } 3952 if (nis) { 3953 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3954 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3955 } 3956 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3957 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3958 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3959 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3960 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3961 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3962 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3963 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3964 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3965 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3966 ierr = PetscFree(onodes);CHKERRQ(ierr); 3967 if (nis) { 3968 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3969 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3970 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3971 } 3972 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3973 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3974 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3975 for (i=0;i<nis;i++) { 3976 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3977 } 3978 *mat_n = NULL; 3979 } 3980 PetscFunctionReturn(0); 3981 } 3982 3983 /* temporary hack into ksp private data structure */ 3984 #include <petsc/private/kspimpl.h> 3985 3986 #undef __FUNCT__ 3987 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3988 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3989 { 3990 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3991 PC_IS *pcis = (PC_IS*)pc->data; 3992 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3993 MatNullSpace CoarseNullSpace=NULL; 3994 ISLocalToGlobalMapping coarse_islg; 3995 IS coarse_is,*isarray; 3996 PetscInt i,im_active=-1,active_procs=-1; 3997 PetscInt nis,nisdofs,nisneu; 3998 PC pc_temp; 3999 PCType coarse_pc_type; 4000 KSPType coarse_ksp_type; 4001 PetscBool multilevel_requested,multilevel_allowed; 4002 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4003 Mat t_coarse_mat_is; 4004 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4005 PetscMPIInt all_procs; 4006 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4007 PetscBool compute_vecs = PETSC_FALSE; 4008 PetscScalar *array; 4009 PetscErrorCode ierr; 4010 4011 PetscFunctionBegin; 4012 /* Assign global numbering to coarse dofs */ 4013 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4014 PetscInt ocoarse_size; 4015 compute_vecs = PETSC_TRUE; 4016 ocoarse_size = pcbddc->coarse_size; 4017 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4018 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4019 /* see if we can avoid some work */ 4020 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4021 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 4022 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4023 coarse_reuse = PETSC_FALSE; 4024 } else { /* we can safely reuse already computed coarse matrix */ 4025 coarse_reuse = PETSC_TRUE; 4026 } 4027 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4028 coarse_reuse = PETSC_FALSE; 4029 } 4030 /* reset any subassembling information */ 4031 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4032 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4033 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4034 coarse_reuse = PETSC_TRUE; 4035 } 4036 4037 /* count "active" (i.e. with positive local size) and "void" processes */ 4038 im_active = !!(pcis->n); 4039 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4040 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4041 void_procs = all_procs-active_procs; 4042 csin_type_simple = PETSC_TRUE; 4043 redist = PETSC_FALSE; 4044 if (pcbddc->current_level && void_procs) { 4045 csin_ml = PETSC_TRUE; 4046 ncoarse_ml = void_procs; 4047 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4048 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4049 csin_ds = PETSC_TRUE; 4050 ncoarse_ds = pcbddc->redistribute_coarse; 4051 redist = PETSC_TRUE; 4052 } else { 4053 csin_ds = PETSC_TRUE; 4054 ncoarse_ds = active_procs; 4055 redist = PETSC_TRUE; 4056 } 4057 } else { 4058 csin_ml = PETSC_FALSE; 4059 ncoarse_ml = all_procs; 4060 if (void_procs) { 4061 csin_ds = PETSC_TRUE; 4062 ncoarse_ds = void_procs; 4063 csin_type_simple = PETSC_FALSE; 4064 } else { 4065 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4066 csin_ds = PETSC_TRUE; 4067 ncoarse_ds = pcbddc->redistribute_coarse; 4068 redist = PETSC_TRUE; 4069 } else { 4070 csin_ds = PETSC_FALSE; 4071 ncoarse_ds = all_procs; 4072 } 4073 } 4074 } 4075 4076 /* 4077 test if we can go multilevel: three conditions must be satisfied: 4078 - we have not exceeded the number of levels requested 4079 - we can actually subassemble the active processes 4080 - we can find a suitable number of MPI processes where we can place the subassembled problem 4081 */ 4082 multilevel_allowed = PETSC_FALSE; 4083 multilevel_requested = PETSC_FALSE; 4084 if (pcbddc->current_level < pcbddc->max_levels) { 4085 multilevel_requested = PETSC_TRUE; 4086 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4087 multilevel_allowed = PETSC_FALSE; 4088 } else { 4089 multilevel_allowed = PETSC_TRUE; 4090 } 4091 } 4092 /* determine number of process partecipating to coarse solver */ 4093 if (multilevel_allowed) { 4094 ncoarse = ncoarse_ml; 4095 csin = csin_ml; 4096 redist = PETSC_FALSE; 4097 } else { 4098 ncoarse = ncoarse_ds; 4099 csin = csin_ds; 4100 } 4101 4102 /* creates temporary l2gmap and IS for coarse indexes */ 4103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4104 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4105 4106 /* creates temporary MATIS object for coarse matrix */ 4107 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4108 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4109 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4110 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4111 #if 0 4112 { 4113 PetscViewer viewer; 4114 char filename[256]; 4115 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4116 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4117 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4118 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4119 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4120 } 4121 #endif 4122 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4123 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4124 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4125 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4126 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4127 4128 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4129 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4130 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4131 const PetscInt *idxs; 4132 ISLocalToGlobalMapping tmap; 4133 4134 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4135 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4136 /* allocate space for temporary storage */ 4137 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4138 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4139 /* allocate for IS array */ 4140 nisdofs = pcbddc->n_ISForDofsLocal; 4141 nisneu = !!pcbddc->NeumannBoundariesLocal; 4142 nis = nisdofs + nisneu; 4143 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4144 /* dofs splitting */ 4145 for (i=0;i<nisdofs;i++) { 4146 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4147 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4148 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4149 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4150 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4151 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4152 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4153 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4154 } 4155 /* neumann boundaries */ 4156 if (pcbddc->NeumannBoundariesLocal) { 4157 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4158 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4159 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4160 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4161 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4162 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4163 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4164 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4165 } 4166 /* free memory */ 4167 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4168 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4169 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4170 } else { 4171 nis = 0; 4172 nisdofs = 0; 4173 nisneu = 0; 4174 isarray = NULL; 4175 } 4176 /* destroy no longer needed map */ 4177 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4178 4179 /* restrict on coarse candidates (if needed) */ 4180 coarse_mat_is = NULL; 4181 if (csin) { 4182 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4183 if (redist) { 4184 PetscMPIInt rank; 4185 PetscInt spc,n_spc_p1,dest[1],destsize; 4186 4187 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4188 spc = active_procs/ncoarse; 4189 n_spc_p1 = active_procs%ncoarse; 4190 if (im_active) { 4191 destsize = 1; 4192 if (rank > n_spc_p1*(spc+1)-1) { 4193 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4194 } else { 4195 dest[0] = rank/(spc+1); 4196 } 4197 } else { 4198 destsize = 0; 4199 } 4200 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4201 } else if (csin_type_simple) { 4202 PetscMPIInt rank; 4203 PetscInt issize,isidx; 4204 4205 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4206 if (im_active) { 4207 issize = 1; 4208 isidx = (PetscInt)rank; 4209 } else { 4210 issize = 0; 4211 isidx = -1; 4212 } 4213 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4214 } else { /* get a suitable subassembling pattern from MATIS code */ 4215 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4216 } 4217 4218 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4219 if (!redist || ncoarse <= void_procs) { 4220 PetscInt ncoarse_cand,tissize,*nisindices; 4221 PetscInt *coarse_candidates; 4222 const PetscInt* tisindices; 4223 4224 /* get coarse candidates' ranks in pc communicator */ 4225 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4226 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4227 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4228 if (!coarse_candidates[i]) { 4229 coarse_candidates[ncoarse_cand++]=i; 4230 } 4231 } 4232 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4233 4234 4235 if (pcbddc->dbg_flag) { 4236 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4238 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4239 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4240 for (i=0;i<ncoarse_cand;i++) { 4241 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4242 } 4243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4244 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4245 } 4246 /* shift the pattern on coarse candidates */ 4247 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4248 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4249 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4250 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4251 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4252 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4253 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4254 } 4255 if (pcbddc->dbg_flag) { 4256 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4258 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4259 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4260 } 4261 } 4262 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4263 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4264 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4265 } else { /* this is the last level, so use just receiving processes in subcomm */ 4266 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4267 } 4268 } else { 4269 if (pcbddc->dbg_flag) { 4270 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4273 } 4274 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4275 coarse_mat_is = t_coarse_mat_is; 4276 } 4277 4278 /* create local to global scatters for coarse problem */ 4279 if (compute_vecs) { 4280 PetscInt lrows; 4281 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4282 if (coarse_mat_is) { 4283 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4284 } else { 4285 lrows = 0; 4286 } 4287 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4288 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4289 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4290 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4291 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4292 } 4293 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4294 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4295 4296 /* set defaults for coarse KSP and PC */ 4297 if (multilevel_allowed) { 4298 coarse_ksp_type = KSPRICHARDSON; 4299 coarse_pc_type = PCBDDC; 4300 } else { 4301 coarse_ksp_type = KSPPREONLY; 4302 coarse_pc_type = PCREDUNDANT; 4303 } 4304 4305 /* print some info if requested */ 4306 if (pcbddc->dbg_flag) { 4307 if (!multilevel_allowed) { 4308 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4309 if (multilevel_requested) { 4310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4311 } else if (pcbddc->max_levels) { 4312 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4313 } 4314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4315 } 4316 } 4317 4318 /* create the coarse KSP object only once with defaults */ 4319 if (coarse_mat_is) { 4320 MatReuse coarse_mat_reuse; 4321 PetscViewer dbg_viewer = NULL; 4322 if (pcbddc->dbg_flag) { 4323 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4324 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4325 } 4326 if (!pcbddc->coarse_ksp) { 4327 char prefix[256],str_level[16]; 4328 size_t len; 4329 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4330 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4331 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4332 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4333 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4334 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4335 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4336 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4337 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4338 /* prefix */ 4339 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4340 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4341 if (!pcbddc->current_level) { 4342 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4343 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4344 } else { 4345 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4346 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4347 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4348 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4349 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4350 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4351 } 4352 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4353 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4354 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4355 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4356 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4357 /* allow user customization */ 4358 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4359 } 4360 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4361 if (nisdofs) { 4362 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4363 for (i=0;i<nisdofs;i++) { 4364 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4365 } 4366 } 4367 if (nisneu) { 4368 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4369 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4370 } 4371 4372 /* get some info after set from options */ 4373 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4374 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4375 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4376 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4377 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4378 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4379 isbddc = PETSC_FALSE; 4380 } 4381 if (isredundant) { 4382 KSP inner_ksp; 4383 PC inner_pc; 4384 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4385 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4386 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4387 } 4388 4389 /* assemble coarse matrix */ 4390 if (coarse_reuse) { 4391 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4392 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4393 coarse_mat_reuse = MAT_REUSE_MATRIX; 4394 } else { 4395 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4396 } 4397 if (isbddc || isnn) { 4398 if (pcbddc->coarsening_ratio > 1) { 4399 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4400 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4401 if (pcbddc->dbg_flag) { 4402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4403 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4404 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4405 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4406 } 4407 } 4408 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4409 } else { 4410 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4411 coarse_mat = coarse_mat_is; 4412 } 4413 } else { 4414 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4415 } 4416 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4417 4418 /* propagate symmetry info of coarse matrix */ 4419 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4420 if (pc->pmat->symmetric_set) { 4421 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4422 } 4423 if (pc->pmat->hermitian_set) { 4424 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4425 } 4426 if (pc->pmat->spd_set) { 4427 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4428 } 4429 /* set operators */ 4430 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4431 if (pcbddc->dbg_flag) { 4432 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4433 } 4434 } else { /* processes non partecipating to coarse solver (if any) */ 4435 coarse_mat = 0; 4436 } 4437 ierr = PetscFree(isarray);CHKERRQ(ierr); 4438 #if 0 4439 { 4440 PetscViewer viewer; 4441 char filename[256]; 4442 sprintf(filename,"coarse_mat.m"); 4443 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4444 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4445 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4446 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4447 } 4448 #endif 4449 4450 /* Compute coarse null space (special handling by BDDC only) */ 4451 if (pcbddc->NullSpace) { 4452 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4453 } 4454 4455 if (pcbddc->coarse_ksp) { 4456 Vec crhs,csol; 4457 PetscBool ispreonly; 4458 4459 if (CoarseNullSpace) { 4460 if (isbddc) { 4461 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4462 } else { 4463 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4464 } 4465 } 4466 /* setup coarse ksp */ 4467 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4468 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4469 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4470 /* hack */ 4471 if (!csol) { 4472 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4473 } 4474 if (!crhs) { 4475 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4476 } 4477 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4478 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4479 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4480 KSP check_ksp; 4481 KSPType check_ksp_type; 4482 PC check_pc; 4483 Vec check_vec,coarse_vec; 4484 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4485 PetscInt its; 4486 PetscBool compute_eigs; 4487 PetscReal *eigs_r,*eigs_c; 4488 PetscInt neigs; 4489 const char *prefix; 4490 4491 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4492 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4493 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4494 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4495 if (ispreonly) { 4496 check_ksp_type = KSPPREONLY; 4497 compute_eigs = PETSC_FALSE; 4498 } else { 4499 check_ksp_type = KSPGMRES; 4500 compute_eigs = PETSC_TRUE; 4501 } 4502 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4503 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4504 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4505 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4506 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4507 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4508 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4509 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4510 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4511 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4512 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4513 /* create random vec */ 4514 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4515 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4516 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4517 if (CoarseNullSpace) { 4518 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4519 } 4520 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4521 /* solve coarse problem */ 4522 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4523 if (CoarseNullSpace) { 4524 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4525 } 4526 /* set eigenvalue estimation if preonly has not been requested */ 4527 if (compute_eigs) { 4528 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4529 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4530 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4531 lambda_max = eigs_r[neigs-1]; 4532 lambda_min = eigs_r[0]; 4533 if (pcbddc->use_coarse_estimates) { 4534 if (lambda_max>lambda_min) { 4535 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4536 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4537 } 4538 } 4539 } 4540 4541 /* check coarse problem residual error */ 4542 if (pcbddc->dbg_flag) { 4543 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4544 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4545 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4546 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4547 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4548 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4549 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4550 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4551 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4552 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4553 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4554 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4555 if (compute_eigs) { 4556 PetscReal lambda_max_s,lambda_min_s; 4557 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4558 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4559 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4560 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4561 for (i=0;i<neigs;i++) { 4562 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4563 } 4564 } 4565 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4566 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4567 } 4568 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4569 if (compute_eigs) { 4570 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4571 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4572 } 4573 } 4574 } 4575 /* print additional info */ 4576 if (pcbddc->dbg_flag) { 4577 /* waits until all processes reaches this point */ 4578 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4581 } 4582 4583 /* free memory */ 4584 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4585 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4591 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4592 { 4593 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4594 PC_IS* pcis = (PC_IS*)pc->data; 4595 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4596 IS subset,subset_mult,subset_n; 4597 PetscInt local_size,coarse_size=0; 4598 PetscInt *local_primal_indices=NULL; 4599 const PetscInt *t_local_primal_indices; 4600 PetscErrorCode ierr; 4601 4602 PetscFunctionBegin; 4603 /* Compute global number of coarse dofs */ 4604 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4605 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4606 } 4607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4608 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4609 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4610 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4611 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4612 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4613 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4614 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4615 if (local_size != pcbddc->local_primal_size) { 4616 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4617 } 4618 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4619 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4620 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4621 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4622 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4623 4624 /* check numbering */ 4625 if (pcbddc->dbg_flag) { 4626 PetscScalar coarsesum,*array; 4627 PetscInt i; 4628 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4629 4630 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4633 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4634 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4635 for (i=0;i<pcbddc->local_primal_size;i++) { 4636 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4637 } 4638 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4639 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4640 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4641 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4642 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4643 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4644 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4645 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4646 for (i=0;i<pcis->n;i++) { 4647 if (array[i] == 1.0) { 4648 set_error = PETSC_TRUE; 4649 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4650 } 4651 } 4652 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4653 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4654 for (i=0;i<pcis->n;i++) { 4655 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4656 } 4657 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4658 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4659 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4660 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4661 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4662 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4663 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4664 PetscInt *gidxs; 4665 4666 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4667 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4668 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4669 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4671 for (i=0;i<pcbddc->local_primal_size;i++) { 4672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4673 } 4674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4675 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4676 } 4677 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4678 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4679 } 4680 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4681 /* get back data */ 4682 *coarse_size_n = coarse_size; 4683 *local_primal_indices_n = local_primal_indices; 4684 PetscFunctionReturn(0); 4685 } 4686 4687 #undef __FUNCT__ 4688 #define __FUNCT__ "PCBDDCGlobalToLocal" 4689 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4690 { 4691 IS localis_t; 4692 PetscInt i,lsize,*idxs,n; 4693 PetscScalar *vals; 4694 PetscErrorCode ierr; 4695 4696 PetscFunctionBegin; 4697 /* get indices in local ordering exploiting local to global map */ 4698 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4699 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4700 for (i=0;i<lsize;i++) vals[i] = 1.0; 4701 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4702 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4703 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4704 if (idxs) { /* multilevel guard */ 4705 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4706 } 4707 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4708 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4709 ierr = PetscFree(vals);CHKERRQ(ierr); 4710 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4711 /* now compute set in local ordering */ 4712 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4713 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4714 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4715 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4716 for (i=0,lsize=0;i<n;i++) { 4717 if (PetscRealPart(vals[i]) > 0.5) { 4718 lsize++; 4719 } 4720 } 4721 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4722 for (i=0,lsize=0;i<n;i++) { 4723 if (PetscRealPart(vals[i]) > 0.5) { 4724 idxs[lsize++] = i; 4725 } 4726 } 4727 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4728 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4729 *localis = localis_t; 4730 PetscFunctionReturn(0); 4731 } 4732 4733 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4734 #undef __FUNCT__ 4735 #define __FUNCT__ "PCBDDCMatMult_Private" 4736 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4737 { 4738 PCBDDCChange_ctx change_ctx; 4739 PetscErrorCode ierr; 4740 4741 PetscFunctionBegin; 4742 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4743 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4744 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4745 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4746 PetscFunctionReturn(0); 4747 } 4748 4749 #undef __FUNCT__ 4750 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4751 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4752 { 4753 PCBDDCChange_ctx change_ctx; 4754 PetscErrorCode ierr; 4755 4756 PetscFunctionBegin; 4757 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4758 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4759 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4760 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4761 PetscFunctionReturn(0); 4762 } 4763 4764 #undef __FUNCT__ 4765 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4766 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4767 { 4768 PC_IS *pcis=(PC_IS*)pc->data; 4769 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4770 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4771 Mat S_j; 4772 PetscInt *used_xadj,*used_adjncy; 4773 PetscBool free_used_adj; 4774 PetscErrorCode ierr; 4775 4776 PetscFunctionBegin; 4777 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4778 free_used_adj = PETSC_FALSE; 4779 if (pcbddc->sub_schurs_layers == -1) { 4780 used_xadj = NULL; 4781 used_adjncy = NULL; 4782 } else { 4783 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4784 used_xadj = pcbddc->mat_graph->xadj; 4785 used_adjncy = pcbddc->mat_graph->adjncy; 4786 } else if (pcbddc->computed_rowadj) { 4787 used_xadj = pcbddc->mat_graph->xadj; 4788 used_adjncy = pcbddc->mat_graph->adjncy; 4789 } else { 4790 PetscBool flg_row=PETSC_FALSE; 4791 const PetscInt *xadj,*adjncy; 4792 PetscInt nvtxs; 4793 4794 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4795 if (flg_row) { 4796 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4797 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4798 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4799 free_used_adj = PETSC_TRUE; 4800 } else { 4801 pcbddc->sub_schurs_layers = -1; 4802 used_xadj = NULL; 4803 used_adjncy = NULL; 4804 } 4805 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4806 } 4807 } 4808 4809 /* setup sub_schurs data */ 4810 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4811 if (!sub_schurs->use_mumps) { 4812 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4813 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4814 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4815 } else { 4816 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4817 PetscBool isseqaij; 4818 if (!pcbddc->use_vertices && reuse_solvers) { 4819 PetscInt n_vertices; 4820 4821 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4822 reuse_solvers = (PetscBool)!n_vertices; 4823 } 4824 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4825 if (!isseqaij) { 4826 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4827 if (matis->A == pcbddc->local_mat) { 4828 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4829 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4830 } else { 4831 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4832 } 4833 } 4834 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4835 } 4836 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4837 4838 /* free adjacency */ 4839 if (free_used_adj) { 4840 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4841 } 4842 PetscFunctionReturn(0); 4843 } 4844 4845 #undef __FUNCT__ 4846 #define __FUNCT__ "PCBDDCInitSubSchurs" 4847 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4848 { 4849 PC_IS *pcis=(PC_IS*)pc->data; 4850 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4851 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4852 PCBDDCGraph graph; 4853 PetscErrorCode ierr; 4854 4855 PetscFunctionBegin; 4856 /* attach interface graph for determining subsets */ 4857 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4858 IS verticesIS,verticescomm; 4859 PetscInt vsize,*idxs; 4860 4861 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4862 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4863 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4864 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4865 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4866 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4867 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4868 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4869 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4870 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4871 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4872 /* 4873 if (pcbddc->dbg_flag) { 4874 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4875 } 4876 */ 4877 } else { 4878 graph = pcbddc->mat_graph; 4879 } 4880 4881 /* sub_schurs init */ 4882 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4883 4884 /* free graph struct */ 4885 if (pcbddc->sub_schurs_rebuild) { 4886 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4887 } 4888 PetscFunctionReturn(0); 4889 } 4890