1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 thresh = 1.0; 92 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 93 #if defined(PETSC_USE_COMPLEX) 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 95 #else 96 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 97 #endif 98 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 99 ierr = PetscFPTrapPop();CHKERRQ(ierr); 100 } else { 101 /* TODO */ 102 } 103 } else { 104 lwork = 0; 105 } 106 107 nv = 0; 108 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 109 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 110 } 111 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 112 if (allocated_S_St) { 113 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 114 } 115 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 116 #if defined(PETSC_USE_COMPLEX) 117 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 118 #endif 119 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 121 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 122 nv+cum,&pcbddc->adaptive_constraints_idxs, 123 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 124 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 125 126 maxneigs = 0; 127 cum = cum2 = cumarray = 0; 128 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 129 pcbddc->adaptive_constraints_data_ptr[0] = 0; 130 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 131 const PetscInt *idxs; 132 133 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 134 for (cum=0;cum<nv;cum++) { 135 pcbddc->adaptive_constraints_n[cum] = 1; 136 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 139 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 140 } 141 cum2 = cum; 142 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 143 } 144 145 if (mss) { /* multilevel */ 146 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 148 } 149 150 for (i=0;i<sub_schurs->n_subs;i++) { 151 152 const PetscInt *idxs; 153 PetscReal infty = PETSC_MAX_REAL; 154 PetscInt j,subset_size,eigs_start = 0; 155 PetscBLASInt B_N; 156 PetscBool same_data = PETSC_FALSE; 157 158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 159 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 160 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 161 if (sub_schurs->is_hermitian) { 162 PetscInt j,k; 163 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 164 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 for (k=j;k<subset_size;k++) { 169 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 170 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 171 } 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 183 /* see if we can save some work */ 184 if (sub_schurs->n_subs == 1) { 185 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 186 } 187 188 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 189 B_neigs = 0; 190 } else { 191 /* Threshold: this is an heuristic for edges */ 192 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 193 194 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 195 PetscBLASInt B_itype = 1; 196 PetscBLASInt B_IL, B_IU; 197 PetscReal eps = -1.0; /* dlamch? */ 198 PetscInt nmin_s; 199 200 /* ask for eigenvalues larger than thresh */ 201 if (pcbddc->dbg_flag) { 202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 203 } 204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 205 #if defined(PETSC_USE_COMPLEX) 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 207 #else 208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 209 #endif 210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 211 if (B_ierr) { 212 if (B_ierr < 0 ) { 213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 214 } else if (B_ierr <= B_N) { 215 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 216 } else { 217 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 218 } 219 } 220 221 if (B_neigs > nmax) { 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 224 } 225 eigs_start = B_neigs -nmax; 226 B_neigs = nmax; 227 } 228 229 nmin_s = PetscMin(nmin,B_N); 230 if (B_neigs < nmin_s) { 231 PetscBLASInt B_neigs2; 232 233 B_IU = B_N - B_neigs; 234 B_IL = B_N - nmin_s + 1; 235 if (pcbddc->dbg_flag) { 236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 237 } 238 if (sub_schurs->is_hermitian) { 239 PetscInt j; 240 for (j=0;j<subset_size;j++) { 241 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 242 } 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 } else { 247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 249 } 250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 253 #else 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 255 #endif 256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 257 B_neigs += B_neigs2; 258 } 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 if (pcbddc->dbg_flag) { 269 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 270 for (j=0;j<B_neigs;j++) { 271 if (eigs[j] == 0.0) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 273 } else { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 275 } 276 } 277 } 278 } else { 279 /* TODO */ 280 } 281 } 282 maxneigs = PetscMax(B_neigs,maxneigs); 283 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 284 if (B_neigs) { 285 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 286 287 if (pcbddc->dbg_flag > 1) { 288 PetscInt ii; 289 for (ii=0;ii<B_neigs;ii++) { 290 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 291 for (j=0;j<B_N;j++) { 292 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 293 } 294 } 295 } 296 #if 0 297 for (j=0;j<B_neigs;j++) { 298 PetscBLASInt Blas_N,Blas_one = 1.0; 299 PetscScalar norm; 300 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 301 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 302 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 303 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 304 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 305 } else { 306 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 307 } 308 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 } 310 #endif 311 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 312 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 313 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 314 cum++; 315 } 316 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 317 /* shift for next computation */ 318 cumarray += subset_size*subset_size; 319 } 320 if (pcbddc->dbg_flag) { 321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 322 } 323 324 if (mss) { 325 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 326 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 327 /* destroy matrices (junk) */ 328 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 329 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 330 } 331 if (allocated_S_St) { 332 ierr = PetscFree2(S,St);CHKERRQ(ierr); 333 } 334 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 335 #if defined(PETSC_USE_COMPLEX) 336 ierr = PetscFree(rwork);CHKERRQ(ierr); 337 #endif 338 if (pcbddc->dbg_flag) { 339 PetscInt maxneigs_r; 340 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCSetUpSolvers" 348 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 349 { 350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 351 PetscScalar *coarse_submat_vals; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 /* Setup local scatters R_to_B and (optionally) R_to_D */ 356 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 357 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 358 359 /* Setup local neumann solver ksp_R */ 360 /* PCBDDCSetUpLocalScatters should be called first! */ 361 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 362 363 /* Change global null space passed in by the user if change of basis has been requested */ 364 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 365 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 366 } 367 368 /* 369 Setup local correction and local part of coarse basis. 370 Gives back the dense local part of the coarse matrix in column major ordering 371 */ 372 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 373 374 /* Compute total number of coarse nodes and setup coarse solver */ 375 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 376 377 /* free */ 378 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCBDDCResetCustomization" 384 PetscErrorCode PCBDDCResetCustomization(PC pc) 385 { 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PetscErrorCode ierr; 388 389 PetscFunctionBegin; 390 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 392 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 393 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 394 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 395 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 396 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 397 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 398 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 399 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 400 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetTopography" 406 PetscErrorCode PCBDDCResetTopography(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 415 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 416 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "PCBDDCResetSolvers" 422 PetscErrorCode PCBDDCResetSolvers(PC pc) 423 { 424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 425 PetscScalar *array; 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 430 if (pcbddc->coarse_phi_B) { 431 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 432 ierr = PetscFree(array);CHKERRQ(ierr); 433 } 434 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 435 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 436 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 437 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 438 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 442 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 443 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 445 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 446 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 447 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 448 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 449 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 450 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 453 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 454 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 455 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 456 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 462 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 463 { 464 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 465 PC_IS *pcis = (PC_IS*)pc->data; 466 VecType impVecType; 467 PetscInt n_constraints,n_R,old_size; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 if (!pcbddc->ConstraintMatrix) { 472 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 473 } 474 /* get sizes */ 475 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 476 n_R = pcis->n-pcbddc->n_vertices; 477 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 478 /* local work vectors (try to avoid unneeded work)*/ 479 /* R nodes */ 480 old_size = -1; 481 if (pcbddc->vec1_R) { 482 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 483 } 484 if (n_R != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 487 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 488 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 489 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 490 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 491 } 492 /* local primal dofs */ 493 old_size = -1; 494 if (pcbddc->vec1_P) { 495 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 496 } 497 if (pcbddc->local_primal_size != old_size) { 498 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 499 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 500 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 501 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 502 } 503 /* local explicit constraints */ 504 old_size = -1; 505 if (pcbddc->vec1_C) { 506 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 507 } 508 if (n_constraints && n_constraints != old_size) { 509 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 511 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 512 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 513 } 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCBDDCSetUpCorrection" 519 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 520 { 521 PetscErrorCode ierr; 522 /* pointers to pcis and pcbddc */ 523 PC_IS* pcis = (PC_IS*)pc->data; 524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 526 /* submatrices of local problem */ 527 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 528 /* submatrices of local coarse problem */ 529 Mat S_VV,S_CV,S_VC,S_CC; 530 /* working matrices */ 531 Mat C_CR; 532 /* additional working stuff */ 533 PC pc_R; 534 Mat F; 535 PetscBool isLU,isCHOL,isILU; 536 537 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 538 PetscScalar *work; 539 PetscInt *idx_V_B; 540 PetscInt n,n_vertices,n_constraints; 541 PetscInt i,n_R,n_D,n_B; 542 PetscBool unsymmetric_check; 543 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 544 MatType impMatType; 545 /* some shortcuts to scalars */ 546 PetscScalar one=1.0,m_one=-1.0; 547 548 PetscFunctionBegin; 549 n_vertices = pcbddc->n_vertices; 550 n_constraints = pcbddc->local_primal_size-n_vertices; 551 /* Set Non-overlapping dimensions */ 552 n_B = pcis->n_B; 553 n_D = pcis->n - n_B; 554 n_R = pcis->n - n_vertices; 555 556 /* Set types for local objects needed by BDDC precondtioner */ 557 impMatType = MATSEQDENSE; 558 559 /* vertices in boundary numbering */ 560 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 561 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 562 if (i != n_vertices) { 563 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 564 } 565 566 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 567 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 568 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 569 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 571 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 572 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 573 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 576 577 unsymmetric_check = PETSC_FALSE; 578 /* allocate workspace */ 579 n = 0; 580 if (n_constraints) { 581 n += n_R*n_constraints; 582 } 583 if (n_vertices) { 584 n = PetscMax(2*n_R*n_vertices,n); 585 n = PetscMax((n_R+n_B)*n_vertices,n); 586 } 587 if (!pcbddc->symmetric_primal) { 588 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 589 unsymmetric_check = PETSC_TRUE; 590 } 591 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 592 593 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 594 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 595 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 596 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 597 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 598 if (isLU || isILU || isCHOL) { 599 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 600 } else if (sub_schurs->reuse_mumps) { 601 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 602 MatFactorType type; 603 604 F = reuse_mumps->F; 605 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 606 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 607 } else { 608 F = NULL; 609 } 610 611 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 612 if (n_constraints) { 613 Mat M1,M2,M3; 614 Mat auxmat; 615 IS is_aux; 616 PetscScalar *array,*array2; 617 618 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 627 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 641 if (F) { 642 Mat B; 643 644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 645 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 646 ierr = MatDestroy(&B);CHKERRQ(ierr); 647 } else { 648 PetscScalar *marr; 649 650 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 651 for (i=0;i<n_constraints;i++) { 652 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 653 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 654 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 657 } 658 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 659 } 660 if (!pcbddc->switch_static) { 661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 662 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 663 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 664 for (i=0;i<n_constraints;i++) { 665 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 666 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 667 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 670 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 671 } 672 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 674 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 675 } else { 676 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 677 pcbddc->local_auxmat2 = local_auxmat2_R; 678 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 679 } 680 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 681 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 682 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 683 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 684 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 685 if (isCHOL) { 686 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 687 } else { 688 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 689 } 690 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 691 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 692 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 693 ierr = MatDestroy(&M2);CHKERRQ(ierr); 694 ierr = MatDestroy(&M3);CHKERRQ(ierr); 695 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 696 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 697 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 698 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 699 ierr = MatDestroy(&M1);CHKERRQ(ierr); 700 } 701 /* Get submatrices from subdomain matrix */ 702 if (n_vertices) { 703 Mat newmat; 704 IS is_aux; 705 PetscInt ibs,mbs; 706 PetscBool issbaij; 707 708 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 709 IS tis; 710 711 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 712 ierr = ISSort(tis);CHKERRQ(ierr); 713 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 714 ierr = ISDestroy(&tis);CHKERRQ(ierr); 715 } else { 716 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 717 } 718 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 719 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 720 if (ibs != mbs) { /* need to convert to SEQAIJ */ 721 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 722 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 723 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 724 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 725 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 727 } else { 728 /* this is safe */ 729 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 731 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 732 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 734 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 735 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 736 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 738 } else { 739 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 741 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 742 } 743 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 744 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 745 } 746 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 747 } 748 749 /* Matrix of coarse basis functions (local) */ 750 if (pcbddc->coarse_phi_B) { 751 PetscInt on_B,on_primal,on_D=n_D; 752 if (pcbddc->coarse_phi_D) { 753 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 754 } 755 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 756 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 757 PetscScalar *marray; 758 759 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 760 ierr = PetscFree(marray);CHKERRQ(ierr); 761 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 762 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 763 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 765 } 766 } 767 768 if (!pcbddc->coarse_phi_B) { 769 PetscScalar *marray; 770 771 n = n_B*pcbddc->local_primal_size; 772 if (pcbddc->switch_static || pcbddc->dbg_flag) { 773 n += n_D*pcbddc->local_primal_size; 774 } 775 if (!pcbddc->symmetric_primal) { 776 n *= 2; 777 } 778 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 780 n = n_B*pcbddc->local_primal_size; 781 if (pcbddc->switch_static || pcbddc->dbg_flag) { 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 783 n += n_D*pcbddc->local_primal_size; 784 } 785 if (!pcbddc->symmetric_primal) { 786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 n = n_B*pcbddc->local_primal_size; 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 793 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 796 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 797 } 798 } 799 } 800 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 801 /* vertices */ 802 if (n_vertices) { 803 804 if (n_R) { 805 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 806 PetscBLASInt B_N,B_one = 1; 807 PetscScalar *x,*y; 808 809 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 810 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 811 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 812 if (F) { /* TODO could be optimized for symmetric problems */ 813 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 814 } else { 815 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 816 for (i=0;i<n_vertices;i++) { 817 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 818 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 819 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 820 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 822 } 823 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 826 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 827 if (n_constraints) { 828 Mat B; 829 830 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 831 for (i=0;i<n_vertices;i++) { 832 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 838 } 839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 841 ierr = MatDestroy(&B);CHKERRQ(ierr); 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 843 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 844 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 845 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 846 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 847 ierr = MatDestroy(&B);CHKERRQ(ierr); 848 } 849 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 850 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 851 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 852 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 853 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 854 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 855 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 856 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 857 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 858 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 859 } else { 860 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 861 } 862 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 863 /* coarse basis functions */ 864 for (i=0;i<n_vertices;i++) { 865 PetscScalar *y; 866 867 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 868 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 870 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 y[n_B*i+idx_V_B[i]] = 1.0; 873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 875 876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 877 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 878 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 879 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 883 } 884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 885 } 886 } 887 888 if (n_constraints) { 889 Mat B; 890 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 892 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 893 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 894 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 895 if (n_vertices) { 896 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 897 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 898 } else { 899 Mat S_VCt; 900 901 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 902 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 903 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<n_constraints;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 919 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 921 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 924 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 925 } 926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 927 } 928 } 929 if (n_constraints) { 930 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 931 } 932 933 /* compute other basis functions for non-symmetric problems */ 934 if (!pcbddc->symmetric_primal) { 935 936 if (n_constraints) { 937 Mat S_CCT,B_C; 938 939 /* this is a lazy thing */ 940 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 942 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 944 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 945 if (n_vertices) { 946 Mat B_V,S_VCT; 947 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 949 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 950 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 951 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 952 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 953 } 954 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 955 } else { /* if there are no constraints, reset work */ 956 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 957 } 958 if (n_vertices && n_R) { 959 Mat A_VRT; 960 PetscScalar *marray; 961 PetscBLASInt B_N,B_one = 1; 962 963 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 964 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 965 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 966 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 967 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 968 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 969 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 970 } 971 972 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 973 for (i=0;i<pcbddc->local_primal_size;i++) { 974 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 975 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 976 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 977 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 979 } 980 } else { 981 for (i=0;i<pcbddc->local_primal_size;i++) { 982 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 983 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 984 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 986 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 987 } 988 } 989 /* coarse basis functions */ 990 for (i=0;i<pcbddc->local_primal_size;i++) { 991 PetscScalar *y; 992 993 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 994 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 996 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 if (i<n_vertices) { 999 y[n_B*i+idx_V_B[i]] = 1.0; 1000 } 1001 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1003 1004 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1005 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1006 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1007 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1010 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1011 } 1012 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1013 } 1014 } 1015 /* free memory */ 1016 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1017 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1018 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1019 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1020 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1021 ierr = PetscFree(work);CHKERRQ(ierr); 1022 if (n_vertices) { 1023 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1024 } 1025 if (n_constraints) { 1026 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1027 } 1028 /* Checking coarse_sub_mat and coarse basis functios */ 1029 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1030 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1031 if (pcbddc->dbg_flag) { 1032 Mat coarse_sub_mat; 1033 Mat AUXMAT,TM1,TM2,TM3,TM4; 1034 Mat coarse_phi_D,coarse_phi_B; 1035 Mat coarse_psi_D,coarse_psi_B; 1036 Mat A_II,A_BB,A_IB,A_BI; 1037 Mat C_B,CPHI; 1038 IS is_dummy; 1039 Vec mones; 1040 MatType checkmattype=MATSEQAIJ; 1041 PetscReal real_value; 1042 1043 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1044 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1045 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1046 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1047 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1048 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1049 if (unsymmetric_check) { 1050 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1051 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1052 } 1053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1054 1055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1058 if (unsymmetric_check) { 1059 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1060 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1061 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1063 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1066 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1071 } else { 1072 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1073 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1074 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1077 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1078 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1080 } 1081 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1082 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1083 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1084 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1085 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1086 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1087 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 1090 /* check constraints */ 1091 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1092 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1093 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1094 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1095 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1096 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1097 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1099 if (unsymmetric_check) { 1100 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1101 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1102 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1103 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1105 } 1106 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1107 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1109 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1113 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1114 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1115 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1116 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1117 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1118 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1119 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1120 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1121 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1122 if (unsymmetric_check) { 1123 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1124 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1125 } 1126 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1127 } 1128 /* get back data */ 1129 *coarse_submat_vals_n = coarse_submat_vals; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1136 { 1137 Mat *work_mat; 1138 IS isrow_s,iscol_s; 1139 PetscBool rsorted,csorted; 1140 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1145 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1146 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1148 1149 if (!rsorted) { 1150 const PetscInt *idxs; 1151 PetscInt *idxs_sorted,i; 1152 1153 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1155 for (i=0;i<rsize;i++) { 1156 idxs_perm_r[i] = i; 1157 } 1158 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1159 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1160 for (i=0;i<rsize;i++) { 1161 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1162 } 1163 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1164 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1167 isrow_s = isrow; 1168 } 1169 1170 if (!csorted) { 1171 if (isrow == iscol) { 1172 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1173 iscol_s = isrow_s; 1174 } else { 1175 const PetscInt *idxs; 1176 PetscInt *idxs_sorted,i; 1177 1178 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1179 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1180 for (i=0;i<csize;i++) { 1181 idxs_perm_c[i] = i; 1182 } 1183 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1184 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1185 for (i=0;i<csize;i++) { 1186 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1187 } 1188 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1189 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1190 } 1191 } else { 1192 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1193 iscol_s = iscol; 1194 } 1195 1196 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1197 1198 if (!rsorted || !csorted) { 1199 Mat new_mat; 1200 IS is_perm_r,is_perm_c; 1201 1202 if (!rsorted) { 1203 PetscInt *idxs_r,i; 1204 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1205 for (i=0;i<rsize;i++) { 1206 idxs_r[idxs_perm_r[i]] = i; 1207 } 1208 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1209 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1210 } else { 1211 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1212 } 1213 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1214 1215 if (!csorted) { 1216 if (isrow_s == iscol_s) { 1217 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1218 is_perm_c = is_perm_r; 1219 } else { 1220 PetscInt *idxs_c,i; 1221 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1222 for (i=0;i<csize;i++) { 1223 idxs_c[idxs_perm_c[i]] = i; 1224 } 1225 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1227 } 1228 } else { 1229 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1230 } 1231 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1232 1233 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1234 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1235 work_mat[0] = new_mat; 1236 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1237 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1238 } 1239 1240 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1241 *B = work_mat[0]; 1242 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1243 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1244 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1251 { 1252 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1253 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1254 Mat new_mat; 1255 IS is_local,is_global; 1256 PetscInt local_size; 1257 PetscBool isseqaij; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1262 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1263 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1265 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1266 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1267 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1268 1269 /* check */ 1270 if (pcbddc->dbg_flag) { 1271 Vec x,x_change; 1272 PetscReal error; 1273 1274 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1275 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1276 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1280 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1282 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1283 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1286 ierr = VecDestroy(&x);CHKERRQ(ierr); 1287 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1288 } 1289 1290 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1291 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1292 if (isseqaij) { 1293 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1294 } else { 1295 Mat work_mat; 1296 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1297 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1298 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1299 } 1300 if (matis->A->symmetric_set) { 1301 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1302 #if !defined(PETSC_USE_COMPLEX) 1303 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1304 #endif 1305 } 1306 /* 1307 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1308 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1309 */ 1310 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1316 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1317 { 1318 PC_IS* pcis = (PC_IS*)(pc->data); 1319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1320 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1321 PetscInt *idx_R_local=NULL; 1322 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1323 PetscInt vbs,bs; 1324 PetscBT bitmask=NULL; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* 1329 No need to setup local scatters if 1330 - primal space is unchanged 1331 AND 1332 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1333 AND 1334 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1335 */ 1336 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1337 PetscFunctionReturn(0); 1338 } 1339 /* destroy old objects */ 1340 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1341 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1343 /* Set Non-overlapping dimensions */ 1344 n_B = pcis->n_B; 1345 n_D = pcis->n - n_B; 1346 n_vertices = pcbddc->n_vertices; 1347 1348 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1349 1350 /* create auxiliary bitmask and allocate workspace */ 1351 if (!sub_schurs->reuse_mumps) { 1352 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1353 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1354 for (i=0;i<n_vertices;i++) { 1355 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1356 } 1357 1358 for (i=0, n_R=0; i<pcis->n; i++) { 1359 if (!PetscBTLookup(bitmask,i)) { 1360 idx_R_local[n_R++] = i; 1361 } 1362 } 1363 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1364 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1365 1366 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1367 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1368 } 1369 1370 /* Block code */ 1371 vbs = 1; 1372 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1373 if (bs>1 && !(n_vertices%bs)) { 1374 PetscBool is_blocked = PETSC_TRUE; 1375 PetscInt *vary; 1376 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1377 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1378 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1379 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1380 for (i=0; i<n_vertices/bs; i++) { 1381 if (vary[i]!=0 && vary[i]!=bs) { 1382 is_blocked = PETSC_FALSE; 1383 break; 1384 } 1385 } 1386 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1387 vbs = bs; 1388 for (i=0;i<n_R/vbs;i++) { 1389 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1390 } 1391 } 1392 ierr = PetscFree(vary);CHKERRQ(ierr); 1393 } 1394 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1395 if (sub_schurs->reuse_mumps) { 1396 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1397 1398 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1399 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1400 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1401 reuse_mumps->is_R = pcbddc->is_R_local; 1402 } else { 1403 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1404 } 1405 1406 /* print some info if requested */ 1407 if (pcbddc->dbg_flag) { 1408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1410 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1415 } 1416 1417 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1418 if (!sub_schurs->reuse_mumps) { 1419 IS is_aux1,is_aux2; 1420 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1421 1422 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1423 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1425 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1426 for (i=0; i<n_D; i++) { 1427 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1428 } 1429 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1430 for (i=0, j=0; i<n_R; i++) { 1431 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1432 aux_array1[j++] = i; 1433 } 1434 } 1435 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1436 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1437 for (i=0, j=0; i<n_B; i++) { 1438 if (!PetscBTLookup(bitmask,is_indices[i])) { 1439 aux_array2[j++] = i; 1440 } 1441 } 1442 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1443 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1444 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1445 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1447 1448 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1449 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1450 for (i=0, j=0; i<n_R; i++) { 1451 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1452 aux_array1[j++] = i; 1453 } 1454 } 1455 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1456 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1457 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1458 } 1459 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1460 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1461 } else { 1462 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1463 IS tis; 1464 PetscInt schur_size; 1465 1466 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1467 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1468 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1469 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1470 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1471 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1472 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1473 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1474 } 1475 } 1476 PetscFunctionReturn(0); 1477 } 1478 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1482 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1483 { 1484 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1485 PC_IS *pcis = (PC_IS*)pc->data; 1486 PC pc_temp; 1487 Mat A_RR; 1488 MatReuse reuse; 1489 PetscScalar m_one = -1.0; 1490 PetscReal value; 1491 PetscInt n_D,n_R,ibs,mbs; 1492 PetscBool use_exact,use_exact_reduced,issbaij; 1493 PetscErrorCode ierr; 1494 /* prefixes stuff */ 1495 char dir_prefix[256],neu_prefix[256],str_level[16]; 1496 size_t len; 1497 1498 PetscFunctionBegin; 1499 1500 /* compute prefixes */ 1501 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1502 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1503 if (!pcbddc->current_level) { 1504 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1505 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1506 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1507 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1508 } else { 1509 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1510 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1511 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1512 len -= 15; /* remove "pc_bddc_coarse_" */ 1513 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1514 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1515 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1516 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1517 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1518 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1519 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1520 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1521 } 1522 1523 /* DIRICHLET PROBLEM */ 1524 if (dirichlet) { 1525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1526 if (pcbddc->local_mat->symmetric_set) { 1527 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1528 } 1529 /* Matrix for Dirichlet problem is pcis->A_II */ 1530 n_D = pcis->n - pcis->n_B; 1531 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1532 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1533 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1534 /* default */ 1535 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1536 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1537 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1538 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1539 if (issbaij) { 1540 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1541 } else { 1542 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1543 } 1544 /* Allow user's customization */ 1545 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1546 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1547 } 1548 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1549 if (sub_schurs->reuse_mumps) { 1550 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1551 1552 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1553 } 1554 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1555 if (!n_D) { 1556 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1557 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1558 } 1559 /* Set Up KSP for Dirichlet problem of BDDC */ 1560 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1561 /* set ksp_D into pcis data */ 1562 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1563 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1564 pcis->ksp_D = pcbddc->ksp_D; 1565 } 1566 1567 /* NEUMANN PROBLEM */ 1568 A_RR = 0; 1569 if (neumann) { 1570 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1571 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1572 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1573 if (pcbddc->ksp_R) { /* already created ksp */ 1574 PetscInt nn_R; 1575 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1576 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1577 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1578 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1579 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1580 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1581 reuse = MAT_INITIAL_MATRIX; 1582 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1583 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1584 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1585 reuse = MAT_INITIAL_MATRIX; 1586 } else { /* safe to reuse the matrix */ 1587 reuse = MAT_REUSE_MATRIX; 1588 } 1589 } 1590 /* last check */ 1591 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1592 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1593 reuse = MAT_INITIAL_MATRIX; 1594 } 1595 } else { /* first time, so we need to create the matrix */ 1596 reuse = MAT_INITIAL_MATRIX; 1597 } 1598 /* extract A_RR */ 1599 if (!sub_schurs->reuse_mumps) { 1600 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1601 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1602 if (ibs != mbs) { 1603 Mat newmat; 1604 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1605 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1606 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1607 } else { 1608 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1609 } 1610 if (pcbddc->local_mat->symmetric_set) { 1611 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1612 } 1613 } else { 1614 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1615 1616 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1617 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1618 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1619 } 1620 if (!pcbddc->ksp_R) { /* create object if not present */ 1621 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1622 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1623 /* default */ 1624 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1625 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1626 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1627 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1628 if (issbaij) { 1629 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1630 } else { 1631 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1632 } 1633 /* Allow user's customization */ 1634 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1635 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1636 } 1637 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1638 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1639 if (!n_R) { 1640 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1641 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1642 } 1643 /* Reuse MUMPS solver if it is present */ 1644 if (sub_schurs->reuse_mumps) { 1645 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1646 1647 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1648 } 1649 /* Set Up KSP for Neumann problem of BDDC */ 1650 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1651 } 1652 /* free Neumann problem's matrix */ 1653 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1654 1655 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1656 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1657 if (pcbddc->dbg_flag) { 1658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1659 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1661 } 1662 if (dirichlet) { /* Dirichlet */ 1663 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1664 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1665 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1667 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1668 /* need to be adapted? */ 1669 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1670 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1671 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1672 /* print info */ 1673 if (pcbddc->dbg_flag) { 1674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 } 1677 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1678 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1679 } 1680 } 1681 if (neumann) { /* Neumann */ 1682 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1683 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1684 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1685 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1687 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1688 /* need to be adapted? */ 1689 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1690 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1691 /* print info */ 1692 if (pcbddc->dbg_flag) { 1693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1695 } 1696 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1697 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1698 } 1699 } 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1706 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1707 { 1708 PetscErrorCode ierr; 1709 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1710 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1711 1712 PetscFunctionBegin; 1713 if (!sub_schurs->reuse_mumps) { 1714 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1715 } 1716 if (!pcbddc->switch_static) { 1717 if (applytranspose && pcbddc->local_auxmat1) { 1718 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1719 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1720 } 1721 if (!sub_schurs->reuse_mumps) { 1722 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 } else { 1725 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1726 1727 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 } 1730 } else { 1731 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1732 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1733 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1734 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1735 if (applytranspose && pcbddc->local_auxmat1) { 1736 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1737 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1738 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 } 1741 } 1742 if (!sub_schurs->reuse_mumps) { 1743 if (applytranspose) { 1744 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1745 } else { 1746 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1747 } 1748 #if defined(PETSC_HAVE_MUMPS) 1749 } else { 1750 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1751 1752 if (applytranspose) { 1753 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1756 } 1757 #endif 1758 } 1759 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1760 if (!pcbddc->switch_static) { 1761 if (!sub_schurs->reuse_mumps) { 1762 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1763 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1764 } else { 1765 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1766 1767 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1768 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1769 } 1770 if (!applytranspose && pcbddc->local_auxmat1) { 1771 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1772 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1773 } 1774 } else { 1775 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 if (!applytranspose && pcbddc->local_auxmat1) { 1780 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1781 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1782 } 1783 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1787 } 1788 PetscFunctionReturn(0); 1789 } 1790 1791 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1794 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1795 { 1796 PetscErrorCode ierr; 1797 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1798 PC_IS* pcis = (PC_IS*) (pc->data); 1799 const PetscScalar zero = 0.0; 1800 1801 PetscFunctionBegin; 1802 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1803 if (applytranspose) { 1804 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1805 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1806 } else { 1807 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1808 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1809 } 1810 /* start communications from local primal nodes to rhs of coarse solver */ 1811 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1812 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1813 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1814 1815 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1816 /* TODO remove null space when doing multilevel */ 1817 if (pcbddc->coarse_ksp) { 1818 Vec rhs,sol; 1819 1820 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1821 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1822 if (applytranspose) { 1823 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1824 } else { 1825 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1826 } 1827 } 1828 1829 /* Local solution on R nodes */ 1830 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1831 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1832 } 1833 1834 /* communications from coarse sol to local primal nodes */ 1835 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1836 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1837 1838 /* Sum contributions from two levels */ 1839 if (applytranspose) { 1840 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1841 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1842 } else { 1843 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1844 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1851 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1852 { 1853 PetscErrorCode ierr; 1854 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1855 PetscScalar *array; 1856 Vec from,to; 1857 1858 PetscFunctionBegin; 1859 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1860 from = pcbddc->coarse_vec; 1861 to = pcbddc->vec1_P; 1862 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1863 Vec tvec; 1864 1865 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1866 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1867 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1868 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1869 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1870 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1871 } 1872 } else { /* from local to global -> put data in coarse right hand side */ 1873 from = pcbddc->vec1_P; 1874 to = pcbddc->coarse_vec; 1875 } 1876 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1882 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1883 { 1884 PetscErrorCode ierr; 1885 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1886 PetscScalar *array; 1887 Vec from,to; 1888 1889 PetscFunctionBegin; 1890 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1891 from = pcbddc->coarse_vec; 1892 to = pcbddc->vec1_P; 1893 } else { /* from local to global -> put data in coarse right hand side */ 1894 from = pcbddc->vec1_P; 1895 to = pcbddc->coarse_vec; 1896 } 1897 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1898 if (smode == SCATTER_FORWARD) { 1899 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1900 Vec tvec; 1901 1902 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1903 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1904 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1905 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1906 } 1907 } else { 1908 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1909 ierr = VecResetArray(from);CHKERRQ(ierr); 1910 } 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 /* uncomment for testing purposes */ 1916 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1919 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1920 { 1921 PetscErrorCode ierr; 1922 PC_IS* pcis = (PC_IS*)(pc->data); 1923 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1924 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1925 /* one and zero */ 1926 PetscScalar one=1.0,zero=0.0; 1927 /* space to store constraints and their local indices */ 1928 PetscScalar *constraints_data; 1929 PetscInt *constraints_idxs,*constraints_idxs_B; 1930 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1931 PetscInt *constraints_n; 1932 /* iterators */ 1933 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1934 /* BLAS integers */ 1935 PetscBLASInt lwork,lierr; 1936 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1937 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1938 /* reuse */ 1939 PetscInt olocal_primal_size,olocal_primal_size_cc; 1940 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1941 /* change of basis */ 1942 PetscBool qr_needed; 1943 PetscBT change_basis,qr_needed_idx; 1944 /* auxiliary stuff */ 1945 PetscInt *nnz,*is_indices; 1946 PetscInt ncc; 1947 /* some quantities */ 1948 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1949 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1950 1951 PetscFunctionBegin; 1952 /* Destroy Mat objects computed previously */ 1953 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1954 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1955 /* save info on constraints from previous setup (if any) */ 1956 olocal_primal_size = pcbddc->local_primal_size; 1957 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1958 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1959 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1960 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1961 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1962 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1963 1964 /* print some info */ 1965 if (pcbddc->dbg_flag) { 1966 IS vertices; 1967 PetscInt nv,nedges,nfaces; 1968 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1969 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1970 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1971 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1977 } 1978 1979 if (!pcbddc->adaptive_selection) { 1980 IS ISForVertices,*ISForFaces,*ISForEdges; 1981 MatNullSpace nearnullsp; 1982 const Vec *nearnullvecs; 1983 Vec *localnearnullsp; 1984 PetscScalar *array; 1985 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1986 PetscBool nnsp_has_cnst; 1987 /* LAPACK working arrays for SVD or POD */ 1988 PetscBool skip_lapack,boolforchange; 1989 PetscScalar *work; 1990 PetscReal *singular_vals; 1991 #if defined(PETSC_USE_COMPLEX) 1992 PetscReal *rwork; 1993 #endif 1994 #if defined(PETSC_MISSING_LAPACK_GESVD) 1995 PetscScalar *temp_basis,*correlation_mat; 1996 #else 1997 PetscBLASInt dummy_int=1; 1998 PetscScalar dummy_scalar=1.; 1999 #endif 2000 2001 /* Get index sets for faces, edges and vertices from graph */ 2002 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2003 /* free unneeded index sets */ 2004 if (!pcbddc->use_vertices) { 2005 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2006 } 2007 if (!pcbddc->use_edges) { 2008 for (i=0;i<n_ISForEdges;i++) { 2009 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2010 } 2011 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2012 n_ISForEdges = 0; 2013 } 2014 if (!pcbddc->use_faces) { 2015 for (i=0;i<n_ISForFaces;i++) { 2016 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2019 n_ISForFaces = 0; 2020 } 2021 2022 #if defined(PETSC_USE_DEBUG) 2023 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2024 Also use_change_of_basis should be consistent among processors */ 2025 if (pcbddc->NullSpace) { 2026 PetscBool tbool[2],gbool[2]; 2027 2028 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2029 pcbddc->use_change_of_basis = PETSC_TRUE; 2030 if (!ISForEdges) { 2031 pcbddc->use_change_on_faces = PETSC_TRUE; 2032 } 2033 } 2034 tbool[0] = pcbddc->use_change_of_basis; 2035 tbool[1] = pcbddc->use_change_on_faces; 2036 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2037 pcbddc->use_change_of_basis = gbool[0]; 2038 pcbddc->use_change_on_faces = gbool[1]; 2039 } 2040 #endif 2041 2042 /* check if near null space is attached to global mat */ 2043 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2044 if (nearnullsp) { 2045 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2046 /* remove any stored info */ 2047 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2048 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2049 /* store information for BDDC solver reuse */ 2050 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2051 pcbddc->onearnullspace = nearnullsp; 2052 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2053 for (i=0;i<nnsp_size;i++) { 2054 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2055 } 2056 } else { /* if near null space is not provided BDDC uses constants by default */ 2057 nnsp_size = 0; 2058 nnsp_has_cnst = PETSC_TRUE; 2059 } 2060 /* get max number of constraints on a single cc */ 2061 max_constraints = nnsp_size; 2062 if (nnsp_has_cnst) max_constraints++; 2063 2064 /* 2065 Evaluate maximum storage size needed by the procedure 2066 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2067 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2068 There can be multiple constraints per connected component 2069 */ 2070 n_vertices = 0; 2071 if (ISForVertices) { 2072 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2073 } 2074 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2075 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2076 2077 total_counts = n_ISForFaces+n_ISForEdges; 2078 total_counts *= max_constraints; 2079 total_counts += n_vertices; 2080 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2081 2082 total_counts = 0; 2083 max_size_of_constraint = 0; 2084 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2085 IS used_is; 2086 if (i<n_ISForEdges) { 2087 used_is = ISForEdges[i]; 2088 } else { 2089 used_is = ISForFaces[i-n_ISForEdges]; 2090 } 2091 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2092 total_counts += j; 2093 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2094 } 2095 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2096 2097 /* get local part of global near null space vectors */ 2098 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2099 for (k=0;k<nnsp_size;k++) { 2100 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2101 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2102 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 } 2104 2105 /* whether or not to skip lapack calls */ 2106 skip_lapack = PETSC_TRUE; 2107 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2108 2109 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2110 if (!skip_lapack) { 2111 PetscScalar temp_work; 2112 2113 #if defined(PETSC_MISSING_LAPACK_GESVD) 2114 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2115 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2116 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2117 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2118 #if defined(PETSC_USE_COMPLEX) 2119 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2120 #endif 2121 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2122 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2124 lwork = -1; 2125 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2126 #if !defined(PETSC_USE_COMPLEX) 2127 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2128 #else 2129 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2130 #endif 2131 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2132 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2133 #else /* on missing GESVD */ 2134 /* SVD */ 2135 PetscInt max_n,min_n; 2136 max_n = max_size_of_constraint; 2137 min_n = max_constraints; 2138 if (max_size_of_constraint < max_constraints) { 2139 min_n = max_size_of_constraint; 2140 max_n = max_constraints; 2141 } 2142 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2143 #if defined(PETSC_USE_COMPLEX) 2144 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2145 #endif 2146 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2147 lwork = -1; 2148 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2151 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2152 #if !defined(PETSC_USE_COMPLEX) 2153 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2154 #else 2155 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2156 #endif 2157 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2158 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2159 #endif /* on missing GESVD */ 2160 /* Allocate optimal workspace */ 2161 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2162 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2163 } 2164 /* Now we can loop on constraining sets */ 2165 total_counts = 0; 2166 constraints_idxs_ptr[0] = 0; 2167 constraints_data_ptr[0] = 0; 2168 /* vertices */ 2169 if (n_vertices) { 2170 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2171 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2172 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2173 for (i=0;i<n_vertices;i++) { 2174 constraints_n[total_counts] = 1; 2175 constraints_data[total_counts] = 1.0; 2176 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2177 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2178 total_counts++; 2179 } 2180 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2181 PetscBool used_vertex; 2182 for (i=0;i<n_vertices;i++) { 2183 used_vertex = PETSC_FALSE; 2184 k = 0; 2185 while (!used_vertex && k<nnsp_size) { 2186 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2187 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2188 constraints_n[total_counts] = 1; 2189 constraints_idxs[total_counts] = is_indices[i]; 2190 constraints_data[total_counts] = 1.0; 2191 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2192 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2193 total_counts++; 2194 used_vertex = PETSC_TRUE; 2195 } 2196 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2197 k++; 2198 } 2199 } 2200 } 2201 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2202 n_vertices = total_counts; 2203 } 2204 2205 /* edges and faces */ 2206 total_counts_cc = total_counts; 2207 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2208 IS used_is; 2209 PetscBool idxs_copied = PETSC_FALSE; 2210 2211 if (ncc<n_ISForEdges) { 2212 used_is = ISForEdges[ncc]; 2213 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2214 } else { 2215 used_is = ISForFaces[ncc-n_ISForEdges]; 2216 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2217 } 2218 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2219 2220 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2221 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2222 /* change of basis should not be performed on local periodic nodes */ 2223 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2224 if (nnsp_has_cnst) { 2225 PetscScalar quad_value; 2226 2227 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2228 idxs_copied = PETSC_TRUE; 2229 2230 if (!pcbddc->use_nnsp_true) { 2231 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2232 } else { 2233 quad_value = 1.0; 2234 } 2235 for (j=0;j<size_of_constraint;j++) { 2236 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2237 } 2238 temp_constraints++; 2239 total_counts++; 2240 } 2241 for (k=0;k<nnsp_size;k++) { 2242 PetscReal real_value; 2243 PetscScalar *ptr_to_data; 2244 2245 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2246 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2247 for (j=0;j<size_of_constraint;j++) { 2248 ptr_to_data[j] = array[is_indices[j]]; 2249 } 2250 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2251 /* check if array is null on the connected component */ 2252 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2253 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2254 if (real_value > 0.0) { /* keep indices and values */ 2255 temp_constraints++; 2256 total_counts++; 2257 if (!idxs_copied) { 2258 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2259 idxs_copied = PETSC_TRUE; 2260 } 2261 } 2262 } 2263 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2264 valid_constraints = temp_constraints; 2265 if (!pcbddc->use_nnsp_true && temp_constraints) { 2266 if (temp_constraints == 1) { /* just normalize the constraint */ 2267 PetscScalar norm,*ptr_to_data; 2268 2269 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2270 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2272 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2273 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2274 } else { /* perform SVD */ 2275 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2276 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2277 2278 #if defined(PETSC_MISSING_LAPACK_GESVD) 2279 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2280 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2281 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2282 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2283 from that computed using LAPACKgesvd 2284 -> This is due to a different computation of eigenvectors in LAPACKheev 2285 -> The quality of the POD-computed basis will be the same */ 2286 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2287 /* Store upper triangular part of correlation matrix */ 2288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2289 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2290 for (j=0;j<temp_constraints;j++) { 2291 for (k=0;k<j+1;k++) { 2292 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2293 } 2294 } 2295 /* compute eigenvalues and eigenvectors of correlation matrix */ 2296 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2297 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2298 #if !defined(PETSC_USE_COMPLEX) 2299 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2300 #else 2301 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2302 #endif 2303 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2304 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2305 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2306 j = 0; 2307 while (j < temp_constraints && singular_vals[j] < tol) j++; 2308 total_counts = total_counts-j; 2309 valid_constraints = temp_constraints-j; 2310 /* scale and copy POD basis into used quadrature memory */ 2311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2315 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2317 if (j<temp_constraints) { 2318 PetscInt ii; 2319 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2320 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2321 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2322 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2323 for (k=0;k<temp_constraints-j;k++) { 2324 for (ii=0;ii<size_of_constraint;ii++) { 2325 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2326 } 2327 } 2328 } 2329 #else /* on missing GESVD */ 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2333 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2334 #if !defined(PETSC_USE_COMPLEX) 2335 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2336 #else 2337 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2338 #endif 2339 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2340 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2341 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2342 k = temp_constraints; 2343 if (k > size_of_constraint) k = size_of_constraint; 2344 j = 0; 2345 while (j < k && singular_vals[k-j-1] < tol) j++; 2346 valid_constraints = k-j; 2347 total_counts = total_counts-temp_constraints+valid_constraints; 2348 #endif /* on missing GESVD */ 2349 } 2350 } 2351 /* update pointers information */ 2352 if (valid_constraints) { 2353 constraints_n[total_counts_cc] = valid_constraints; 2354 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2355 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2356 /* set change_of_basis flag */ 2357 if (boolforchange) { 2358 PetscBTSet(change_basis,total_counts_cc); 2359 } 2360 total_counts_cc++; 2361 } 2362 } 2363 /* free workspace */ 2364 if (!skip_lapack) { 2365 ierr = PetscFree(work);CHKERRQ(ierr); 2366 #if defined(PETSC_USE_COMPLEX) 2367 ierr = PetscFree(rwork);CHKERRQ(ierr); 2368 #endif 2369 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2370 #if defined(PETSC_MISSING_LAPACK_GESVD) 2371 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2372 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2373 #endif 2374 } 2375 for (k=0;k<nnsp_size;k++) { 2376 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2377 } 2378 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2379 /* free index sets of faces, edges and vertices */ 2380 for (i=0;i<n_ISForFaces;i++) { 2381 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2382 } 2383 if (n_ISForFaces) { 2384 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2385 } 2386 for (i=0;i<n_ISForEdges;i++) { 2387 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2388 } 2389 if (n_ISForEdges) { 2390 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2391 } 2392 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2393 } else { 2394 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2395 2396 total_counts = 0; 2397 n_vertices = 0; 2398 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2399 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2400 } 2401 max_constraints = 0; 2402 total_counts_cc = 0; 2403 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2404 total_counts += pcbddc->adaptive_constraints_n[i]; 2405 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2406 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2407 } 2408 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2409 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2410 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2411 constraints_data = pcbddc->adaptive_constraints_data; 2412 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2413 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2414 total_counts_cc = 0; 2415 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2416 if (pcbddc->adaptive_constraints_n[i]) { 2417 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2418 } 2419 } 2420 #if 0 2421 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2422 for (i=0;i<total_counts_cc;i++) { 2423 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2424 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2425 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2426 printf(" %d",constraints_idxs[j]); 2427 } 2428 printf("\n"); 2429 printf("number of cc: %d\n",constraints_n[i]); 2430 } 2431 for (i=0;i<n_vertices;i++) { 2432 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2433 } 2434 for (i=0;i<sub_schurs->n_subs;i++) { 2435 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2436 } 2437 #endif 2438 2439 max_size_of_constraint = 0; 2440 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2441 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2442 /* Change of basis */ 2443 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2444 if (pcbddc->use_change_of_basis) { 2445 for (i=0;i<sub_schurs->n_subs;i++) { 2446 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2447 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2448 } 2449 } 2450 } 2451 } 2452 pcbddc->local_primal_size = total_counts; 2453 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2454 2455 /* map constraints_idxs in boundary numbering */ 2456 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2457 if (i != constraints_idxs_ptr[total_counts_cc]) { 2458 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2459 } 2460 2461 /* Create constraint matrix */ 2462 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2463 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2464 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2465 2466 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2467 /* determine if a QR strategy is needed for change of basis */ 2468 qr_needed = PETSC_FALSE; 2469 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2470 total_primal_vertices=0; 2471 pcbddc->local_primal_size_cc = 0; 2472 for (i=0;i<total_counts_cc;i++) { 2473 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2474 if (size_of_constraint == 1) { 2475 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2476 pcbddc->local_primal_size_cc += 1; 2477 } else if (PetscBTLookup(change_basis,i)) { 2478 for (k=0;k<constraints_n[i];k++) { 2479 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2480 } 2481 pcbddc->local_primal_size_cc += constraints_n[i]; 2482 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2483 PetscBTSet(qr_needed_idx,i); 2484 qr_needed = PETSC_TRUE; 2485 } 2486 } else { 2487 pcbddc->local_primal_size_cc += 1; 2488 } 2489 } 2490 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2491 pcbddc->n_vertices = total_primal_vertices; 2492 /* permute indices in order to have a sorted set of vertices */ 2493 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2494 2495 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2496 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2497 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2498 2499 /* nonzero structure of constraint matrix */ 2500 /* and get reference dof for local constraints */ 2501 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2502 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2503 2504 j = total_primal_vertices; 2505 total_counts = total_primal_vertices; 2506 cum = total_primal_vertices; 2507 for (i=n_vertices;i<total_counts_cc;i++) { 2508 if (!PetscBTLookup(change_basis,i)) { 2509 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2510 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2511 cum++; 2512 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2513 for (k=0;k<constraints_n[i];k++) { 2514 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2515 nnz[j+k] = size_of_constraint; 2516 } 2517 j += constraints_n[i]; 2518 } 2519 } 2520 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2521 ierr = PetscFree(nnz);CHKERRQ(ierr); 2522 2523 /* set values in constraint matrix */ 2524 for (i=0;i<total_primal_vertices;i++) { 2525 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2526 } 2527 total_counts = total_primal_vertices; 2528 for (i=n_vertices;i<total_counts_cc;i++) { 2529 if (!PetscBTLookup(change_basis,i)) { 2530 PetscInt *cols; 2531 2532 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2533 cols = constraints_idxs+constraints_idxs_ptr[i]; 2534 for (k=0;k<constraints_n[i];k++) { 2535 PetscInt row = total_counts+k; 2536 PetscScalar *vals; 2537 2538 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2539 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2540 } 2541 total_counts += constraints_n[i]; 2542 } 2543 } 2544 /* assembling */ 2545 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2547 2548 /* 2549 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2550 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2551 */ 2552 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2553 if (pcbddc->use_change_of_basis) { 2554 /* dual and primal dofs on a single cc */ 2555 PetscInt dual_dofs,primal_dofs; 2556 /* working stuff for GEQRF */ 2557 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2558 PetscBLASInt lqr_work; 2559 /* working stuff for UNGQR */ 2560 PetscScalar *gqr_work,lgqr_work_t; 2561 PetscBLASInt lgqr_work; 2562 /* working stuff for TRTRS */ 2563 PetscScalar *trs_rhs; 2564 PetscBLASInt Blas_NRHS; 2565 /* pointers for values insertion into change of basis matrix */ 2566 PetscInt *start_rows,*start_cols; 2567 PetscScalar *start_vals; 2568 /* working stuff for values insertion */ 2569 PetscBT is_primal; 2570 PetscInt *aux_primal_numbering_B; 2571 /* matrix sizes */ 2572 PetscInt global_size,local_size; 2573 /* temporary change of basis */ 2574 Mat localChangeOfBasisMatrix; 2575 /* extra space for debugging */ 2576 PetscScalar *dbg_work; 2577 2578 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2579 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2580 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2581 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2582 /* nonzeros for local mat */ 2583 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2584 for (i=0;i<pcis->n;i++) nnz[i]=1; 2585 for (i=n_vertices;i<total_counts_cc;i++) { 2586 if (PetscBTLookup(change_basis,i)) { 2587 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2588 if (PetscBTLookup(qr_needed_idx,i)) { 2589 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2590 } else { 2591 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2592 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2593 } 2594 } 2595 } 2596 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2597 ierr = PetscFree(nnz);CHKERRQ(ierr); 2598 /* Set initial identity in the matrix */ 2599 for (i=0;i<pcis->n;i++) { 2600 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2601 } 2602 2603 if (pcbddc->dbg_flag) { 2604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2606 } 2607 2608 2609 /* Now we loop on the constraints which need a change of basis */ 2610 /* 2611 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2612 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2613 2614 Basic blocks of change of basis matrix T computed by 2615 2616 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2617 2618 | 1 0 ... 0 s_1/S | 2619 | 0 1 ... 0 s_2/S | 2620 | ... | 2621 | 0 ... 1 s_{n-1}/S | 2622 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2623 2624 with S = \sum_{i=1}^n s_i^2 2625 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2626 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2627 2628 - QR decomposition of constraints otherwise 2629 */ 2630 if (qr_needed) { 2631 /* space to store Q */ 2632 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2633 /* first we issue queries for optimal work */ 2634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2635 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2636 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2637 lqr_work = -1; 2638 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2639 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2640 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2641 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2642 lgqr_work = -1; 2643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2644 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2645 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2646 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2647 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2648 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2649 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2650 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2651 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2652 /* array to store scaling factors for reflectors */ 2653 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2654 /* array to store rhs and solution of triangular solver */ 2655 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2656 /* allocating workspace for check */ 2657 if (pcbddc->dbg_flag) { 2658 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2659 } 2660 } 2661 /* array to store whether a node is primal or not */ 2662 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2663 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2664 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2665 if (i != total_primal_vertices) { 2666 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2667 } 2668 for (i=0;i<total_primal_vertices;i++) { 2669 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2670 } 2671 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2672 2673 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2674 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2675 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2676 if (PetscBTLookup(change_basis,total_counts)) { 2677 /* get constraint info */ 2678 primal_dofs = constraints_n[total_counts]; 2679 dual_dofs = size_of_constraint-primal_dofs; 2680 2681 if (pcbddc->dbg_flag) { 2682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2683 } 2684 2685 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2686 2687 /* copy quadrature constraints for change of basis check */ 2688 if (pcbddc->dbg_flag) { 2689 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2690 } 2691 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2692 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2693 2694 /* compute QR decomposition of constraints */ 2695 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2696 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2697 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2698 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2699 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2700 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2701 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2702 2703 /* explictly compute R^-T */ 2704 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2705 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2706 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2710 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2711 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2712 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2714 2715 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2717 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2718 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2719 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2720 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2721 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2722 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2723 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2724 2725 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2726 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2727 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2734 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2735 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2736 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2737 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2738 2739 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2740 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2741 /* insert cols for primal dofs */ 2742 for (j=0;j<primal_dofs;j++) { 2743 start_vals = &qr_basis[j*size_of_constraint]; 2744 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2745 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2746 } 2747 /* insert cols for dual dofs */ 2748 for (j=0,k=0;j<dual_dofs;k++) { 2749 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2750 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2751 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2752 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2753 j++; 2754 } 2755 } 2756 2757 /* check change of basis */ 2758 if (pcbddc->dbg_flag) { 2759 PetscInt ii,jj; 2760 PetscBool valid_qr=PETSC_TRUE; 2761 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2765 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2766 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2767 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2768 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2769 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2770 for (jj=0;jj<size_of_constraint;jj++) { 2771 for (ii=0;ii<primal_dofs;ii++) { 2772 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2773 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2774 } 2775 } 2776 if (!valid_qr) { 2777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2778 for (jj=0;jj<size_of_constraint;jj++) { 2779 for (ii=0;ii<primal_dofs;ii++) { 2780 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2781 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2782 } 2783 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2784 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2785 } 2786 } 2787 } 2788 } else { 2789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2790 } 2791 } 2792 } else { /* simple transformation block */ 2793 PetscInt row,col; 2794 PetscScalar val,norm; 2795 2796 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2797 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2798 for (j=0;j<size_of_constraint;j++) { 2799 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2800 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2801 if (!PetscBTLookup(is_primal,row_B)) { 2802 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2803 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2804 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2805 } else { 2806 for (k=0;k<size_of_constraint;k++) { 2807 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2808 if (row != col) { 2809 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2810 } else { 2811 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2812 } 2813 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2814 } 2815 } 2816 } 2817 if (pcbddc->dbg_flag) { 2818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2819 } 2820 } 2821 } else { 2822 if (pcbddc->dbg_flag) { 2823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2824 } 2825 } 2826 } 2827 2828 /* free workspace */ 2829 if (qr_needed) { 2830 if (pcbddc->dbg_flag) { 2831 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2832 } 2833 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2834 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2835 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2836 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2837 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2838 } 2839 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2840 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2842 2843 /* assembling of global change of variable */ 2844 { 2845 Mat tmat; 2846 PetscInt bs; 2847 2848 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2849 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2850 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2851 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2852 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2853 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2854 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2855 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2856 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2857 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2858 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2859 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2860 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2861 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2862 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2863 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2864 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2865 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2866 } 2867 /* check */ 2868 if (pcbddc->dbg_flag) { 2869 PetscReal error; 2870 Vec x,x_change; 2871 2872 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2873 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2874 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2875 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2876 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2877 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2878 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2879 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2881 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2882 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2883 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2884 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2885 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2886 ierr = VecDestroy(&x);CHKERRQ(ierr); 2887 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2888 } 2889 2890 /* adapt sub_schurs computed (if any) */ 2891 if (pcbddc->use_deluxe_scaling) { 2892 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2893 if (sub_schurs->S_Ej_all) { 2894 Mat S_new,tmat; 2895 IS is_all_N; 2896 2897 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2898 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2899 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2900 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2901 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2902 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2903 sub_schurs->S_Ej_all = S_new; 2904 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2905 if (sub_schurs->sum_S_Ej_all) { 2906 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2907 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2908 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2909 sub_schurs->sum_S_Ej_all = S_new; 2910 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2911 } 2912 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2913 } 2914 } 2915 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2916 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2917 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2918 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2919 } 2920 2921 /* set up change of basis context */ 2922 if (pcbddc->ChangeOfBasisMatrix) { 2923 PCBDDCChange_ctx change_ctx; 2924 2925 if (!pcbddc->new_global_mat) { 2926 PetscInt global_size,local_size; 2927 2928 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2929 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2930 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2931 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2932 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2933 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2934 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2935 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2936 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2937 } else { 2938 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2939 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2940 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2941 } 2942 if (!pcbddc->user_ChangeOfBasisMatrix) { 2943 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2944 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2945 } else { 2946 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2947 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2948 } 2949 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2950 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2951 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2952 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 } 2954 2955 /* check if a new primal space has been introduced */ 2956 pcbddc->new_primal_space_local = PETSC_TRUE; 2957 if (olocal_primal_size == pcbddc->local_primal_size) { 2958 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2959 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2960 if (!pcbddc->new_primal_space_local) { 2961 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2962 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2963 } 2964 } 2965 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2966 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2967 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2968 2969 /* flush dbg viewer */ 2970 if (pcbddc->dbg_flag) { 2971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2972 } 2973 2974 /* free workspace */ 2975 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2976 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2977 if (!pcbddc->adaptive_selection) { 2978 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2979 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2980 } else { 2981 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2982 pcbddc->adaptive_constraints_idxs_ptr, 2983 pcbddc->adaptive_constraints_data_ptr, 2984 pcbddc->adaptive_constraints_idxs, 2985 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2986 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2987 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2988 } 2989 PetscFunctionReturn(0); 2990 } 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2994 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2995 { 2996 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2997 PC_IS *pcis = (PC_IS*)pc->data; 2998 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2999 PetscInt ierr,i,vertex_size,N; 3000 PetscViewer viewer=pcbddc->dbg_viewer; 3001 3002 PetscFunctionBegin; 3003 /* Reset previously computed graph */ 3004 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3005 /* Init local Graph struct */ 3006 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3007 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3008 3009 /* Check validity of the csr graph passed in by the user */ 3010 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3011 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3012 } 3013 3014 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3015 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3016 PetscInt *xadj,*adjncy; 3017 PetscInt nvtxs; 3018 PetscBool flg_row=PETSC_FALSE; 3019 3020 if (pcbddc->use_local_adj) { 3021 3022 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3023 if (flg_row) { 3024 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3025 pcbddc->computed_rowadj = PETSC_TRUE; 3026 } 3027 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3028 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3029 IS is_dummy; 3030 ISLocalToGlobalMapping l2gmap_dummy; 3031 PetscInt j,sum; 3032 PetscInt *cxadj,*cadjncy; 3033 const PetscInt *idxs; 3034 PCBDDCGraph graph; 3035 PetscBT is_on_boundary; 3036 3037 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3038 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3039 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3040 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3041 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3042 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3043 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3044 if (flg_row) { 3045 graph->xadj = xadj; 3046 graph->adjncy = adjncy; 3047 } 3048 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3049 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3050 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3051 3052 if (pcbddc->dbg_flag) { 3053 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3054 for (i=0;i<graph->ncc;i++) { 3055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3056 } 3057 } 3058 3059 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3060 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3061 for (i=0;i<pcis->n_B;i++) { 3062 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3063 } 3064 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3065 3066 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3067 sum = 0; 3068 for (i=0;i<graph->ncc;i++) { 3069 PetscInt sizecc = 0; 3070 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3071 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3072 sizecc++; 3073 } 3074 } 3075 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3076 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3077 cxadj[graph->queue[j]] = sizecc; 3078 } 3079 } 3080 sum += sizecc*sizecc; 3081 } 3082 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3083 sum = 0; 3084 for (i=0;i<pcis->n;i++) { 3085 PetscInt temp = cxadj[i]; 3086 cxadj[i] = sum; 3087 sum += temp; 3088 } 3089 cxadj[pcis->n] = sum; 3090 for (i=0;i<graph->ncc;i++) { 3091 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3092 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3093 PetscInt k,sizecc = 0; 3094 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3095 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3096 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3097 sizecc++; 3098 } 3099 } 3100 } 3101 } 3102 } 3103 if (sum) { 3104 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3105 } else { 3106 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3107 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3108 } 3109 graph->xadj = 0; 3110 graph->adjncy = 0; 3111 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3112 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3113 } 3114 } 3115 if (pcbddc->dbg_flag) { 3116 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3117 } 3118 3119 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3120 vertex_size = 1; 3121 if (pcbddc->user_provided_isfordofs) { 3122 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3123 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3124 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3125 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3126 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3127 } 3128 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3129 pcbddc->n_ISForDofs = 0; 3130 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3131 } 3132 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3133 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3134 } else { 3135 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3136 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3137 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3138 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3139 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3140 } 3141 } 3142 } 3143 3144 /* Setup of Graph */ 3145 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3146 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3147 } 3148 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3149 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3150 } 3151 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3152 3153 /* Graph's connected components analysis */ 3154 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3155 3156 /* print some info to stdout */ 3157 if (pcbddc->dbg_flag) { 3158 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3159 } 3160 3161 /* mark topography has done */ 3162 pcbddc->recompute_topography = PETSC_FALSE; 3163 PetscFunctionReturn(0); 3164 } 3165 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "PCBDDCSubsetNumbering" 3168 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3169 { 3170 Vec local_vec,global_vec; 3171 IS seqis,paris; 3172 VecScatter scatter_ctx; 3173 PetscScalar *array; 3174 PetscInt *temp_global_dofs; 3175 PetscScalar globalsum; 3176 PetscInt i,j,s; 3177 PetscInt nlocals,first_index,old_index,max_local,max_global; 3178 PetscMPIInt rank_prec_comm,size_prec_comm; 3179 PetscInt *dof_sizes,*dof_displs; 3180 PetscBool first_found; 3181 PetscErrorCode ierr; 3182 3183 PetscFunctionBegin; 3184 /* mpi buffers */ 3185 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3186 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3187 j = ( !rank_prec_comm ? size_prec_comm : 0); 3188 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3189 /* get maximum size of subset */ 3190 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3191 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3192 max_local = 0; 3193 for (i=0;i<n_local_dofs;i++) { 3194 if (max_local < temp_global_dofs[i] ) { 3195 max_local = temp_global_dofs[i]; 3196 } 3197 } 3198 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3199 max_global++; 3200 max_local = 0; 3201 for (i=0;i<n_local_dofs;i++) { 3202 if (max_local < local_dofs[i] ) { 3203 max_local = local_dofs[i]; 3204 } 3205 } 3206 max_local++; 3207 /* allocate workspace */ 3208 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3209 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3210 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3211 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3212 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3213 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3214 /* create scatter */ 3215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3216 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3217 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3218 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3219 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3220 /* init array */ 3221 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3222 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3223 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3224 if (local_dofs_mult) { 3225 for (i=0;i<n_local_dofs;i++) { 3226 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3227 } 3228 } else { 3229 for (i=0;i<n_local_dofs;i++) { 3230 array[local_dofs[i]]=1.0; 3231 } 3232 } 3233 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3234 /* scatter into global vec and get total number of global dofs */ 3235 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3237 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3238 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3239 /* Fill global_vec with cumulative function for global numbering */ 3240 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3241 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3242 nlocals = 0; 3243 first_index = -1; 3244 first_found = PETSC_FALSE; 3245 for (i=0;i<s;i++) { 3246 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3247 first_found = PETSC_TRUE; 3248 first_index = i; 3249 } 3250 nlocals += (PetscInt)PetscRealPart(array[i]); 3251 } 3252 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3253 if (!rank_prec_comm) { 3254 dof_displs[0]=0; 3255 for (i=1;i<size_prec_comm;i++) { 3256 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3257 } 3258 } 3259 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3260 if (first_found) { 3261 array[first_index] += (PetscScalar)nlocals; 3262 old_index = first_index; 3263 for (i=first_index+1;i<s;i++) { 3264 if (PetscRealPart(array[i]) > 0.1) { 3265 array[i] += array[old_index]; 3266 old_index = i; 3267 } 3268 } 3269 } 3270 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3271 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3272 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3273 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3274 /* get global ordering of local dofs */ 3275 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3276 if (local_dofs_mult) { 3277 for (i=0;i<n_local_dofs;i++) { 3278 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3279 } 3280 } else { 3281 for (i=0;i<n_local_dofs;i++) { 3282 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3283 } 3284 } 3285 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3286 /* free workspace */ 3287 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3288 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3289 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3290 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3291 /* return pointer to global ordering of local dofs */ 3292 *global_numbering_subset = temp_global_dofs; 3293 PetscFunctionReturn(0); 3294 } 3295 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3298 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3299 { 3300 PetscInt i,j; 3301 PetscScalar *alphas; 3302 PetscErrorCode ierr; 3303 3304 PetscFunctionBegin; 3305 /* this implements stabilized Gram-Schmidt */ 3306 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3307 for (i=0;i<n;i++) { 3308 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3309 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3310 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3311 } 3312 ierr = PetscFree(alphas);CHKERRQ(ierr); 3313 PetscFunctionReturn(0); 3314 } 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatISGetSubassemblingPattern" 3318 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3319 { 3320 Mat subdomain_adj; 3321 IS new_ranks,ranks_send_to; 3322 MatPartitioning partitioner; 3323 Mat_IS *matis; 3324 PetscInt n_neighs,*neighs,*n_shared,**shared; 3325 PetscInt prank; 3326 PetscMPIInt size,rank,color; 3327 PetscInt *xadj,*adjncy,*oldranks; 3328 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3329 PetscInt i,local_size,threshold=0; 3330 PetscErrorCode ierr; 3331 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3332 PetscSubcomm subcomm; 3333 3334 PetscFunctionBegin; 3335 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3336 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3337 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3338 3339 /* Get info on mapping */ 3340 matis = (Mat_IS*)(mat->data); 3341 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3342 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3343 3344 /* build local CSR graph of subdomains' connectivity */ 3345 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3346 xadj[0] = 0; 3347 xadj[1] = PetscMax(n_neighs-1,0); 3348 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3349 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3350 3351 if (threshold) { 3352 PetscInt xadj_count = 0; 3353 for (i=1;i<n_neighs;i++) { 3354 if (n_shared[i] > threshold) { 3355 adjncy[xadj_count] = neighs[i]; 3356 adjncy_wgt[xadj_count] = n_shared[i]; 3357 xadj_count++; 3358 } 3359 } 3360 xadj[1] = xadj_count; 3361 } else { 3362 if (xadj[1]) { 3363 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3364 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3365 } 3366 } 3367 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3368 if (use_square) { 3369 for (i=0;i<xadj[1];i++) { 3370 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3371 } 3372 } 3373 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3374 3375 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3376 3377 /* 3378 Restrict work on active processes only. 3379 */ 3380 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3381 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3382 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3383 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3384 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3385 if (color) { 3386 ierr = PetscFree(xadj);CHKERRQ(ierr); 3387 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3388 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3389 } else { 3390 PetscInt coarsening_ratio; 3391 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3392 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3393 prank = rank; 3394 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3395 /* 3396 for (i=0;i<size;i++) { 3397 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3398 } 3399 */ 3400 for (i=0;i<xadj[1];i++) { 3401 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3402 } 3403 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3404 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3405 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3406 3407 /* Partition */ 3408 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3409 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3410 if (use_vwgt) { 3411 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3412 v_wgt[0] = local_size; 3413 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3414 } 3415 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3416 coarsening_ratio = size/n_subdomains; 3417 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3418 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3419 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3420 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3421 3422 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3423 if (contiguous) { 3424 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3425 } else { 3426 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3427 } 3428 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3429 /* clean up */ 3430 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3431 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3432 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3433 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3434 } 3435 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3436 3437 /* assemble parallel IS for sends */ 3438 i = 1; 3439 if (color) i=0; 3440 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3441 3442 /* get back IS */ 3443 *is_sends = ranks_send_to; 3444 PetscFunctionReturn(0); 3445 } 3446 3447 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3448 3449 #undef __FUNCT__ 3450 #define __FUNCT__ "MatISSubassemble" 3451 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3452 { 3453 Mat local_mat; 3454 Mat_IS *matis; 3455 IS is_sends_internal; 3456 PetscInt rows,cols,new_local_rows; 3457 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3458 PetscBool ismatis,isdense,newisdense,destroy_mat; 3459 ISLocalToGlobalMapping l2gmap; 3460 PetscInt* l2gmap_indices; 3461 const PetscInt* is_indices; 3462 MatType new_local_type; 3463 /* buffers */ 3464 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3465 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3466 PetscInt *recv_buffer_idxs_local; 3467 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3468 /* MPI */ 3469 MPI_Comm comm,comm_n; 3470 PetscSubcomm subcomm; 3471 PetscMPIInt n_sends,n_recvs,commsize; 3472 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3473 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3474 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3475 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3476 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3477 PetscErrorCode ierr; 3478 3479 PetscFunctionBegin; 3480 /* TODO: add missing checks */ 3481 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3482 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3483 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3484 PetscValidLogicalCollectiveInt(mat,nis,7); 3485 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3486 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3487 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3488 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3489 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3490 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3491 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3492 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3493 PetscInt mrows,mcols,mnrows,mncols; 3494 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3495 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3496 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3497 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3498 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3499 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3500 } 3501 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3502 PetscValidLogicalCollectiveInt(mat,bs,0); 3503 /* prepare IS for sending if not provided */ 3504 if (!is_sends) { 3505 PetscBool pcontig = PETSC_TRUE; 3506 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3507 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3508 } else { 3509 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3510 is_sends_internal = is_sends; 3511 } 3512 3513 /* get pointer of MATIS data */ 3514 matis = (Mat_IS*)mat->data; 3515 3516 /* get comm */ 3517 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3518 3519 /* compute number of sends */ 3520 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3521 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3522 3523 /* compute number of receives */ 3524 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3525 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3526 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3527 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3528 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3529 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3530 ierr = PetscFree(iflags);CHKERRQ(ierr); 3531 3532 /* restrict comm if requested */ 3533 subcomm = 0; 3534 destroy_mat = PETSC_FALSE; 3535 if (restrict_comm) { 3536 PetscMPIInt color,subcommsize; 3537 3538 color = 0; 3539 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3540 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3541 subcommsize = commsize - subcommsize; 3542 /* check if reuse has been requested */ 3543 if (reuse == MAT_REUSE_MATRIX) { 3544 if (*mat_n) { 3545 PetscMPIInt subcommsize2; 3546 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3547 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3548 comm_n = PetscObjectComm((PetscObject)*mat_n); 3549 } else { 3550 comm_n = PETSC_COMM_SELF; 3551 } 3552 } else { /* MAT_INITIAL_MATRIX */ 3553 PetscMPIInt rank; 3554 3555 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3556 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3557 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3558 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3559 comm_n = PetscSubcommChild(subcomm); 3560 } 3561 /* flag to destroy *mat_n if not significative */ 3562 if (color) destroy_mat = PETSC_TRUE; 3563 } else { 3564 comm_n = comm; 3565 } 3566 3567 /* prepare send/receive buffers */ 3568 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3569 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3570 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3571 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3572 if (nis) { 3573 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3574 } 3575 3576 /* Get data from local matrices */ 3577 if (!isdense) { 3578 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3579 /* TODO: See below some guidelines on how to prepare the local buffers */ 3580 /* 3581 send_buffer_vals should contain the raw values of the local matrix 3582 send_buffer_idxs should contain: 3583 - MatType_PRIVATE type 3584 - PetscInt size_of_l2gmap 3585 - PetscInt global_row_indices[size_of_l2gmap] 3586 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3587 */ 3588 } else { 3589 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3590 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3591 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3592 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3593 send_buffer_idxs[1] = i; 3594 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3595 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3596 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3597 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3598 for (i=0;i<n_sends;i++) { 3599 ilengths_vals[is_indices[i]] = len*len; 3600 ilengths_idxs[is_indices[i]] = len+2; 3601 } 3602 } 3603 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3604 /* additional is (if any) */ 3605 if (nis) { 3606 PetscMPIInt psum; 3607 PetscInt j; 3608 for (j=0,psum=0;j<nis;j++) { 3609 PetscInt plen; 3610 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3611 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3612 psum += len+1; /* indices + lenght */ 3613 } 3614 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3615 for (j=0,psum=0;j<nis;j++) { 3616 PetscInt plen; 3617 const PetscInt *is_array_idxs; 3618 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3619 send_buffer_idxs_is[psum] = plen; 3620 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3621 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3622 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3623 psum += plen+1; /* indices + lenght */ 3624 } 3625 for (i=0;i<n_sends;i++) { 3626 ilengths_idxs_is[is_indices[i]] = psum; 3627 } 3628 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3629 } 3630 3631 buf_size_idxs = 0; 3632 buf_size_vals = 0; 3633 buf_size_idxs_is = 0; 3634 for (i=0;i<n_recvs;i++) { 3635 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3636 buf_size_vals += (PetscInt)olengths_vals[i]; 3637 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3638 } 3639 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3640 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3641 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3642 3643 /* get new tags for clean communications */ 3644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3645 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3646 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3647 3648 /* allocate for requests */ 3649 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3650 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3651 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3652 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3653 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3654 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3655 3656 /* communications */ 3657 ptr_idxs = recv_buffer_idxs; 3658 ptr_vals = recv_buffer_vals; 3659 ptr_idxs_is = recv_buffer_idxs_is; 3660 for (i=0;i<n_recvs;i++) { 3661 source_dest = onodes[i]; 3662 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3663 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3664 ptr_idxs += olengths_idxs[i]; 3665 ptr_vals += olengths_vals[i]; 3666 if (nis) { 3667 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3668 ptr_idxs_is += olengths_idxs_is[i]; 3669 } 3670 } 3671 for (i=0;i<n_sends;i++) { 3672 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3673 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3674 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3675 if (nis) { 3676 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3677 } 3678 } 3679 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3680 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3681 3682 /* assemble new l2g map */ 3683 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3684 ptr_idxs = recv_buffer_idxs; 3685 new_local_rows = 0; 3686 for (i=0;i<n_recvs;i++) { 3687 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3688 ptr_idxs += olengths_idxs[i]; 3689 } 3690 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3691 ptr_idxs = recv_buffer_idxs; 3692 new_local_rows = 0; 3693 for (i=0;i<n_recvs;i++) { 3694 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3695 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3696 ptr_idxs += olengths_idxs[i]; 3697 } 3698 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3699 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3700 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3701 3702 /* infer new local matrix type from received local matrices type */ 3703 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3704 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3705 if (n_recvs) { 3706 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3707 ptr_idxs = recv_buffer_idxs; 3708 for (i=0;i<n_recvs;i++) { 3709 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3710 new_local_type_private = MATAIJ_PRIVATE; 3711 break; 3712 } 3713 ptr_idxs += olengths_idxs[i]; 3714 } 3715 switch (new_local_type_private) { 3716 case MATDENSE_PRIVATE: 3717 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3718 new_local_type = MATSEQAIJ; 3719 bs = 1; 3720 } else { /* if I receive only 1 dense matrix */ 3721 new_local_type = MATSEQDENSE; 3722 bs = 1; 3723 } 3724 break; 3725 case MATAIJ_PRIVATE: 3726 new_local_type = MATSEQAIJ; 3727 bs = 1; 3728 break; 3729 case MATBAIJ_PRIVATE: 3730 new_local_type = MATSEQBAIJ; 3731 break; 3732 case MATSBAIJ_PRIVATE: 3733 new_local_type = MATSEQSBAIJ; 3734 break; 3735 default: 3736 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3737 break; 3738 } 3739 } else { /* by default, new_local_type is seqdense */ 3740 new_local_type = MATSEQDENSE; 3741 bs = 1; 3742 } 3743 3744 /* create MATIS object if needed */ 3745 if (reuse == MAT_INITIAL_MATRIX) { 3746 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3747 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3748 } else { 3749 /* it also destroys the local matrices */ 3750 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3751 } 3752 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3753 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3754 3755 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3756 3757 /* Global to local map of received indices */ 3758 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3759 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3760 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3761 3762 /* restore attributes -> type of incoming data and its size */ 3763 buf_size_idxs = 0; 3764 for (i=0;i<n_recvs;i++) { 3765 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3766 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3767 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3768 } 3769 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3770 3771 /* set preallocation */ 3772 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3773 if (!newisdense) { 3774 PetscInt *new_local_nnz=0; 3775 3776 ptr_vals = recv_buffer_vals; 3777 ptr_idxs = recv_buffer_idxs_local; 3778 if (n_recvs) { 3779 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3780 } 3781 for (i=0;i<n_recvs;i++) { 3782 PetscInt j; 3783 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3784 for (j=0;j<*(ptr_idxs+1);j++) { 3785 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3786 } 3787 } else { 3788 /* TODO */ 3789 } 3790 ptr_idxs += olengths_idxs[i]; 3791 } 3792 if (new_local_nnz) { 3793 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3794 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3795 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3796 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3797 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3798 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3799 } else { 3800 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3801 } 3802 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3803 } else { 3804 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3805 } 3806 3807 /* set values */ 3808 ptr_vals = recv_buffer_vals; 3809 ptr_idxs = recv_buffer_idxs_local; 3810 for (i=0;i<n_recvs;i++) { 3811 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3812 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3813 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3814 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3815 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3816 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3817 } else { 3818 /* TODO */ 3819 } 3820 ptr_idxs += olengths_idxs[i]; 3821 ptr_vals += olengths_vals[i]; 3822 } 3823 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3824 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3825 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3826 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3827 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3828 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3829 3830 #if 0 3831 if (!restrict_comm) { /* check */ 3832 Vec lvec,rvec; 3833 PetscReal infty_error; 3834 3835 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3836 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3837 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3838 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3839 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3840 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3841 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3842 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3843 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3844 } 3845 #endif 3846 3847 /* assemble new additional is (if any) */ 3848 if (nis) { 3849 PetscInt **temp_idxs,*count_is,j,psum; 3850 3851 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3852 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3853 ptr_idxs = recv_buffer_idxs_is; 3854 psum = 0; 3855 for (i=0;i<n_recvs;i++) { 3856 for (j=0;j<nis;j++) { 3857 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3858 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3859 psum += plen; 3860 ptr_idxs += plen+1; /* shift pointer to received data */ 3861 } 3862 } 3863 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3864 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3865 for (i=1;i<nis;i++) { 3866 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3867 } 3868 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3869 ptr_idxs = recv_buffer_idxs_is; 3870 for (i=0;i<n_recvs;i++) { 3871 for (j=0;j<nis;j++) { 3872 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3873 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3874 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3875 ptr_idxs += plen+1; /* shift pointer to received data */ 3876 } 3877 } 3878 for (i=0;i<nis;i++) { 3879 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3880 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3881 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3882 } 3883 ierr = PetscFree(count_is);CHKERRQ(ierr); 3884 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3885 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3886 } 3887 /* free workspace */ 3888 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3889 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3890 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3891 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3892 if (isdense) { 3893 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3894 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3895 } else { 3896 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3897 } 3898 if (nis) { 3899 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3900 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3901 } 3902 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3903 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3904 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3905 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3906 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3907 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3908 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3909 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3910 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3911 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3912 ierr = PetscFree(onodes);CHKERRQ(ierr); 3913 if (nis) { 3914 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3915 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3916 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3917 } 3918 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3919 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3920 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3921 for (i=0;i<nis;i++) { 3922 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3923 } 3924 } 3925 PetscFunctionReturn(0); 3926 } 3927 3928 /* temporary hack into ksp private data structure */ 3929 #include <petsc/private/kspimpl.h> 3930 3931 #undef __FUNCT__ 3932 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3933 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3934 { 3935 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3936 PC_IS *pcis = (PC_IS*)pc->data; 3937 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3938 MatNullSpace CoarseNullSpace=NULL; 3939 ISLocalToGlobalMapping coarse_islg; 3940 IS coarse_is,*isarray; 3941 PetscInt i,im_active=-1,active_procs=-1; 3942 PetscInt nis,nisdofs,nisneu; 3943 PC pc_temp; 3944 PCType coarse_pc_type; 3945 KSPType coarse_ksp_type; 3946 PetscBool multilevel_requested,multilevel_allowed; 3947 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3948 Mat t_coarse_mat_is; 3949 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3950 PetscMPIInt all_procs; 3951 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3952 PetscBool compute_vecs = PETSC_FALSE; 3953 PetscScalar *array; 3954 PetscErrorCode ierr; 3955 3956 PetscFunctionBegin; 3957 /* Assign global numbering to coarse dofs */ 3958 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3959 PetscInt ocoarse_size; 3960 compute_vecs = PETSC_TRUE; 3961 ocoarse_size = pcbddc->coarse_size; 3962 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3963 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3964 /* see if we can avoid some work */ 3965 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3966 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3967 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3968 coarse_reuse = PETSC_FALSE; 3969 } else { /* we can safely reuse already computed coarse matrix */ 3970 coarse_reuse = PETSC_TRUE; 3971 } 3972 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3973 coarse_reuse = PETSC_FALSE; 3974 } 3975 /* reset any subassembling information */ 3976 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3977 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3978 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3979 coarse_reuse = PETSC_TRUE; 3980 } 3981 3982 /* count "active" (i.e. with positive local size) and "void" processes */ 3983 im_active = !!(pcis->n); 3984 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3985 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3986 void_procs = all_procs-active_procs; 3987 csin_type_simple = PETSC_TRUE; 3988 redist = PETSC_FALSE; 3989 if (pcbddc->current_level && void_procs) { 3990 csin_ml = PETSC_TRUE; 3991 ncoarse_ml = void_procs; 3992 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3993 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3994 csin_ds = PETSC_TRUE; 3995 ncoarse_ds = pcbddc->redistribute_coarse; 3996 redist = PETSC_TRUE; 3997 } else { 3998 csin_ds = PETSC_TRUE; 3999 ncoarse_ds = active_procs; 4000 redist = PETSC_TRUE; 4001 } 4002 } else { 4003 csin_ml = PETSC_FALSE; 4004 ncoarse_ml = all_procs; 4005 if (void_procs) { 4006 csin_ds = PETSC_TRUE; 4007 ncoarse_ds = void_procs; 4008 csin_type_simple = PETSC_FALSE; 4009 } else { 4010 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4011 csin_ds = PETSC_TRUE; 4012 ncoarse_ds = pcbddc->redistribute_coarse; 4013 redist = PETSC_TRUE; 4014 } else { 4015 csin_ds = PETSC_FALSE; 4016 ncoarse_ds = all_procs; 4017 } 4018 } 4019 } 4020 4021 /* 4022 test if we can go multilevel: three conditions must be satisfied: 4023 - we have not exceeded the number of levels requested 4024 - we can actually subassemble the active processes 4025 - we can find a suitable number of MPI processes where we can place the subassembled problem 4026 */ 4027 multilevel_allowed = PETSC_FALSE; 4028 multilevel_requested = PETSC_FALSE; 4029 if (pcbddc->current_level < pcbddc->max_levels) { 4030 multilevel_requested = PETSC_TRUE; 4031 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4032 multilevel_allowed = PETSC_FALSE; 4033 } else { 4034 multilevel_allowed = PETSC_TRUE; 4035 } 4036 } 4037 /* determine number of process partecipating to coarse solver */ 4038 if (multilevel_allowed) { 4039 ncoarse = ncoarse_ml; 4040 csin = csin_ml; 4041 redist = PETSC_FALSE; 4042 } else { 4043 ncoarse = ncoarse_ds; 4044 csin = csin_ds; 4045 } 4046 4047 /* creates temporary l2gmap and IS for coarse indexes */ 4048 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4049 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4050 4051 /* creates temporary MATIS object for coarse matrix */ 4052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4053 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4054 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4055 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4056 #if 0 4057 { 4058 PetscViewer viewer; 4059 char filename[256]; 4060 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4061 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4062 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4063 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4064 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4065 } 4066 #endif 4067 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4068 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4069 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4070 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4071 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4072 4073 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4074 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4075 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4076 const PetscInt *idxs; 4077 ISLocalToGlobalMapping tmap; 4078 4079 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4080 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4081 /* allocate space for temporary storage */ 4082 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4083 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4084 /* allocate for IS array */ 4085 nisdofs = pcbddc->n_ISForDofsLocal; 4086 nisneu = !!pcbddc->NeumannBoundariesLocal; 4087 nis = nisdofs + nisneu; 4088 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4089 /* dofs splitting */ 4090 for (i=0;i<nisdofs;i++) { 4091 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4092 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4093 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4094 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4095 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4096 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4097 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4098 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4099 } 4100 /* neumann boundaries */ 4101 if (pcbddc->NeumannBoundariesLocal) { 4102 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4103 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4104 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4105 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4106 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4107 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4108 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4109 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4110 } 4111 /* free memory */ 4112 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4113 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4114 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4115 } else { 4116 nis = 0; 4117 nisdofs = 0; 4118 nisneu = 0; 4119 isarray = NULL; 4120 } 4121 /* destroy no longer needed map */ 4122 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4123 4124 /* restrict on coarse candidates (if needed) */ 4125 coarse_mat_is = NULL; 4126 if (csin) { 4127 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4128 if (redist) { 4129 PetscMPIInt rank; 4130 PetscInt spc,n_spc_p1,dest[1],destsize; 4131 4132 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4133 spc = active_procs/ncoarse; 4134 n_spc_p1 = active_procs%ncoarse; 4135 if (im_active) { 4136 destsize = 1; 4137 if (rank > n_spc_p1*(spc+1)-1) { 4138 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4139 } else { 4140 dest[0] = rank/(spc+1); 4141 } 4142 } else { 4143 destsize = 0; 4144 } 4145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4146 } else if (csin_type_simple) { 4147 PetscMPIInt rank; 4148 PetscInt issize,isidx; 4149 4150 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4151 if (im_active) { 4152 issize = 1; 4153 isidx = (PetscInt)rank; 4154 } else { 4155 issize = 0; 4156 isidx = -1; 4157 } 4158 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4159 } else { /* get a suitable subassembling pattern from MATIS code */ 4160 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4161 } 4162 4163 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4164 if (!redist || ncoarse <= void_procs) { 4165 PetscInt ncoarse_cand,tissize,*nisindices; 4166 PetscInt *coarse_candidates; 4167 const PetscInt* tisindices; 4168 4169 /* get coarse candidates' ranks in pc communicator */ 4170 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4171 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4172 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4173 if (!coarse_candidates[i]) { 4174 coarse_candidates[ncoarse_cand++]=i; 4175 } 4176 } 4177 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4178 4179 4180 if (pcbddc->dbg_flag) { 4181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4183 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4184 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4185 for (i=0;i<ncoarse_cand;i++) { 4186 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4187 } 4188 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4189 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4190 } 4191 /* shift the pattern on coarse candidates */ 4192 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4193 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4194 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4195 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4196 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4197 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4198 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4199 } 4200 if (pcbddc->dbg_flag) { 4201 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4203 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4204 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4205 } 4206 } 4207 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4208 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4209 } else { 4210 if (pcbddc->dbg_flag) { 4211 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4214 } 4215 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4216 coarse_mat_is = t_coarse_mat_is; 4217 } 4218 4219 /* create local to global scatters for coarse problem */ 4220 if (compute_vecs) { 4221 PetscInt lrows; 4222 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4223 if (coarse_mat_is) { 4224 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4225 } else { 4226 lrows = 0; 4227 } 4228 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4229 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4230 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4231 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4232 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4233 } 4234 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4235 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4236 4237 /* set defaults for coarse KSP and PC */ 4238 if (multilevel_allowed) { 4239 coarse_ksp_type = KSPRICHARDSON; 4240 coarse_pc_type = PCBDDC; 4241 } else { 4242 coarse_ksp_type = KSPPREONLY; 4243 coarse_pc_type = PCREDUNDANT; 4244 } 4245 4246 /* print some info if requested */ 4247 if (pcbddc->dbg_flag) { 4248 if (!multilevel_allowed) { 4249 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4250 if (multilevel_requested) { 4251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4252 } else if (pcbddc->max_levels) { 4253 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4254 } 4255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4256 } 4257 } 4258 4259 /* create the coarse KSP object only once with defaults */ 4260 if (coarse_mat_is) { 4261 MatReuse coarse_mat_reuse; 4262 PetscViewer dbg_viewer = NULL; 4263 if (pcbddc->dbg_flag) { 4264 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4265 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4266 } 4267 if (!pcbddc->coarse_ksp) { 4268 char prefix[256],str_level[16]; 4269 size_t len; 4270 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4271 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4272 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4273 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4274 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4275 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4276 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4277 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4278 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4279 /* prefix */ 4280 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4281 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4282 if (!pcbddc->current_level) { 4283 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4284 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4285 } else { 4286 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4287 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4288 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4289 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4290 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4291 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4292 } 4293 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4294 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4295 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4296 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4297 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4298 /* allow user customization */ 4299 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4300 } 4301 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4302 if (nisdofs) { 4303 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4304 for (i=0;i<nisdofs;i++) { 4305 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4306 } 4307 } 4308 if (nisneu) { 4309 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4310 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4311 } 4312 4313 /* get some info after set from options */ 4314 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4315 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4316 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4317 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4318 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4319 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4320 isbddc = PETSC_FALSE; 4321 } 4322 if (isredundant) { 4323 KSP inner_ksp; 4324 PC inner_pc; 4325 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4326 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4327 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4328 } 4329 4330 /* assemble coarse matrix */ 4331 if (coarse_reuse) { 4332 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4333 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4334 coarse_mat_reuse = MAT_REUSE_MATRIX; 4335 } else { 4336 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4337 } 4338 if (isbddc || isnn) { 4339 if (pcbddc->coarsening_ratio > 1) { 4340 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4341 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4342 if (pcbddc->dbg_flag) { 4343 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4344 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4345 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4346 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4347 } 4348 } 4349 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4350 } else { 4351 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4352 coarse_mat = coarse_mat_is; 4353 } 4354 } else { 4355 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4356 } 4357 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4358 4359 /* propagate symmetry info of coarse matrix */ 4360 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4361 if (pc->pmat->symmetric_set) { 4362 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4363 } 4364 if (pc->pmat->hermitian_set) { 4365 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4366 } 4367 if (pc->pmat->spd_set) { 4368 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4369 } 4370 /* set operators */ 4371 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4372 if (pcbddc->dbg_flag) { 4373 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4374 } 4375 } else { /* processes non partecipating to coarse solver (if any) */ 4376 coarse_mat = 0; 4377 } 4378 ierr = PetscFree(isarray);CHKERRQ(ierr); 4379 #if 0 4380 { 4381 PetscViewer viewer; 4382 char filename[256]; 4383 sprintf(filename,"coarse_mat.m"); 4384 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4385 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4386 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4387 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4388 } 4389 #endif 4390 4391 /* Compute coarse null space (special handling by BDDC only) */ 4392 if (pcbddc->NullSpace) { 4393 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4394 } 4395 4396 if (pcbddc->coarse_ksp) { 4397 Vec crhs,csol; 4398 PetscBool ispreonly; 4399 if (CoarseNullSpace) { 4400 if (isbddc) { 4401 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4402 } else { 4403 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4404 } 4405 } 4406 /* setup coarse ksp */ 4407 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4408 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4409 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4410 /* hack */ 4411 if (!csol) { 4412 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4413 } 4414 if (!crhs) { 4415 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4416 } 4417 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4418 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4419 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4420 KSP check_ksp; 4421 KSPType check_ksp_type; 4422 PC check_pc; 4423 Vec check_vec,coarse_vec; 4424 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4425 PetscInt its; 4426 PetscBool compute_eigs; 4427 PetscReal *eigs_r,*eigs_c; 4428 PetscInt neigs; 4429 const char *prefix; 4430 4431 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4432 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4433 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4434 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4435 if (ispreonly) { 4436 check_ksp_type = KSPPREONLY; 4437 compute_eigs = PETSC_FALSE; 4438 } else { 4439 check_ksp_type = KSPGMRES; 4440 compute_eigs = PETSC_TRUE; 4441 } 4442 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4443 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4444 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4445 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4446 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4447 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4448 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4449 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4450 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4451 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4452 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4453 /* create random vec */ 4454 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4455 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4456 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4457 if (CoarseNullSpace) { 4458 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4459 } 4460 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4461 /* solve coarse problem */ 4462 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4463 if (CoarseNullSpace) { 4464 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4465 } 4466 /* set eigenvalue estimation if preonly has not been requested */ 4467 if (compute_eigs) { 4468 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4469 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4470 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4471 lambda_max = eigs_r[neigs-1]; 4472 lambda_min = eigs_r[0]; 4473 if (pcbddc->use_coarse_estimates) { 4474 if (lambda_max>lambda_min) { 4475 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4476 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4477 } 4478 } 4479 } 4480 4481 /* check coarse problem residual error */ 4482 if (pcbddc->dbg_flag) { 4483 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4484 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4485 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4486 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4487 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4488 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4489 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4490 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4491 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4492 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4493 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4494 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4495 if (compute_eigs) { 4496 PetscReal lambda_max_s,lambda_min_s; 4497 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4498 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4499 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4500 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4501 for (i=0;i<neigs;i++) { 4502 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4503 } 4504 } 4505 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4506 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4507 } 4508 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4509 if (compute_eigs) { 4510 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4511 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4512 } 4513 } 4514 } 4515 /* print additional info */ 4516 if (pcbddc->dbg_flag) { 4517 /* waits until all processes reaches this point */ 4518 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4521 } 4522 4523 /* free memory */ 4524 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4525 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4526 PetscFunctionReturn(0); 4527 } 4528 4529 #undef __FUNCT__ 4530 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4531 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4532 { 4533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4534 PC_IS* pcis = (PC_IS*)pc->data; 4535 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4536 PetscInt i,local_size,coarse_size=0; 4537 PetscInt *local_primal_indices=NULL; 4538 PetscInt *t_local_primal_indices=NULL; 4539 PetscErrorCode ierr; 4540 4541 PetscFunctionBegin; 4542 /* Compute global number of coarse dofs */ 4543 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4545 } 4546 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4547 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4548 local_size = 0; 4549 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4550 PetscInt j; 4551 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4552 } 4553 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4554 if (local_size != pcbddc->local_primal_size) { 4555 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4556 } 4557 4558 /* check numbering */ 4559 if (pcbddc->dbg_flag) { 4560 PetscScalar coarsesum,*array; 4561 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4562 4563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4566 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4567 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4568 for (i=0;i<pcbddc->local_primal_size;i++) { 4569 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4570 } 4571 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4572 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4573 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4574 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4575 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4576 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4577 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4578 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4579 for (i=0;i<pcis->n;i++) { 4580 if (array[i] == 1.0) { 4581 set_error = PETSC_TRUE; 4582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4583 } 4584 } 4585 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4587 for (i=0;i<pcis->n;i++) { 4588 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4589 } 4590 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4591 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4592 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4593 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4594 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4596 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4597 PetscInt *gidxs; 4598 4599 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4600 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4601 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4602 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4604 for (i=0;i<pcbddc->local_primal_size;i++) { 4605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4606 } 4607 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4608 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4609 } 4610 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4611 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4612 } 4613 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4614 /* get back data */ 4615 *coarse_size_n = coarse_size; 4616 *local_primal_indices_n = local_primal_indices; 4617 PetscFunctionReturn(0); 4618 } 4619 4620 #undef __FUNCT__ 4621 #define __FUNCT__ "PCBDDCGlobalToLocal" 4622 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4623 { 4624 IS localis_t; 4625 PetscInt i,lsize,*idxs,n; 4626 PetscScalar *vals; 4627 PetscErrorCode ierr; 4628 4629 PetscFunctionBegin; 4630 /* get indices in local ordering exploiting local to global map */ 4631 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4632 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4633 for (i=0;i<lsize;i++) vals[i] = 1.0; 4634 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4635 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4636 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4637 if (idxs) { /* multilevel guard */ 4638 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4639 } 4640 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4641 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4642 ierr = PetscFree(vals);CHKERRQ(ierr); 4643 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4644 /* now compute set in local ordering */ 4645 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4646 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4647 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4648 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4649 for (i=0,lsize=0;i<n;i++) { 4650 if (PetscRealPart(vals[i]) > 0.5) { 4651 lsize++; 4652 } 4653 } 4654 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4655 for (i=0,lsize=0;i<n;i++) { 4656 if (PetscRealPart(vals[i]) > 0.5) { 4657 idxs[lsize++] = i; 4658 } 4659 } 4660 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4661 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4662 *localis = localis_t; 4663 PetscFunctionReturn(0); 4664 } 4665 4666 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4667 #undef __FUNCT__ 4668 #define __FUNCT__ "PCBDDCMatMult_Private" 4669 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4670 { 4671 PCBDDCChange_ctx change_ctx; 4672 PetscErrorCode ierr; 4673 4674 PetscFunctionBegin; 4675 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4676 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4677 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4678 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4679 PetscFunctionReturn(0); 4680 } 4681 4682 #undef __FUNCT__ 4683 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4684 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4685 { 4686 PCBDDCChange_ctx change_ctx; 4687 PetscErrorCode ierr; 4688 4689 PetscFunctionBegin; 4690 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4691 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4692 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4693 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4694 PetscFunctionReturn(0); 4695 } 4696 4697 #undef __FUNCT__ 4698 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4699 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4700 { 4701 PC_IS *pcis=(PC_IS*)pc->data; 4702 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4703 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4704 Mat S_j; 4705 PetscInt *used_xadj,*used_adjncy; 4706 PetscBool free_used_adj; 4707 PetscErrorCode ierr; 4708 4709 PetscFunctionBegin; 4710 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4711 free_used_adj = PETSC_FALSE; 4712 if (pcbddc->sub_schurs_layers == -1) { 4713 used_xadj = NULL; 4714 used_adjncy = NULL; 4715 } else { 4716 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4717 used_xadj = pcbddc->mat_graph->xadj; 4718 used_adjncy = pcbddc->mat_graph->adjncy; 4719 } else if (pcbddc->computed_rowadj) { 4720 used_xadj = pcbddc->mat_graph->xadj; 4721 used_adjncy = pcbddc->mat_graph->adjncy; 4722 } else { 4723 PetscBool flg_row=PETSC_FALSE; 4724 const PetscInt *xadj,*adjncy; 4725 PetscInt nvtxs; 4726 4727 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4728 if (flg_row) { 4729 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4730 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4731 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4732 free_used_adj = PETSC_TRUE; 4733 } else { 4734 pcbddc->sub_schurs_layers = -1; 4735 used_xadj = NULL; 4736 used_adjncy = NULL; 4737 } 4738 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4739 } 4740 } 4741 4742 /* setup sub_schurs data */ 4743 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4744 if (!sub_schurs->use_mumps) { 4745 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4746 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4747 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4748 } else { 4749 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4750 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4751 } 4752 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4753 4754 /* free adjacency */ 4755 if (free_used_adj) { 4756 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4757 } 4758 PetscFunctionReturn(0); 4759 } 4760 4761 #undef __FUNCT__ 4762 #define __FUNCT__ "PCBDDCInitSubSchurs" 4763 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4764 { 4765 PC_IS *pcis=(PC_IS*)pc->data; 4766 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4767 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4768 PCBDDCGraph graph; 4769 PetscErrorCode ierr; 4770 4771 PetscFunctionBegin; 4772 /* attach interface graph for determining subsets */ 4773 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4774 IS verticesIS,verticescomm; 4775 PetscInt vsize,*idxs; 4776 4777 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4778 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4779 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4780 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4781 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4782 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4783 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4784 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4785 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4786 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4787 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4788 /* 4789 if (pcbddc->dbg_flag) { 4790 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4791 } 4792 */ 4793 } else { 4794 graph = pcbddc->mat_graph; 4795 } 4796 4797 /* sub_schurs init */ 4798 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4799 4800 /* free graph struct */ 4801 if (pcbddc->sub_schurs_rebuild) { 4802 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4803 } 4804 PetscFunctionReturn(0); 4805 } 4806